diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..3f060b5 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,53 @@ +project (hsa_conformance C) + +cmake_minimum_required(VERSION 2.8) + +## Windows is currently not supported. Disable windows builds. +if(WIN32) + MESSAGE("-------- Windows build is not supported.") + RETURN() +endif() + +## Specify the location of the configuration files. +set (CMAKE_CONFIG_DIR "${CMAKE_SOURCE_DIR}/cmake") + +## Set the CMAKE module path the CMAKE_CONFIG_DIR. +set (CMAKE_MODULE_PATH ${CMAKE_CONFIG_DIR}) + +## Disable RPATH options +set (CMAKE_SKIP_RPATH TRUE) + +## Determine if the required packages are installed. +find_package(Check REQUIRED) +include_directories(${CHECK_INCLUDE_DIRS}) + +find_package(HSA REQUIRED) +include_directories(${HSA_RUNTIME_INCLUDE_DIRS}) + +## Include common settings. +include (common) + +## Create the test utilities library. +include (utils) + +## Enable testing. +enable_testing() + +## Build the tests. +include (kernel) +include (script) +include (api) +include (agent) +include (aql) +include (async) +include (code) +include (init) +include (memory) +include (atomics) +include (queue) +include (signals) +include (ext_api) +include (finalization) +include (image_clear) +include (image_copy) +include (image_import_export) diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..1934f27 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,38 @@ +The University of Illinois/NCSA +Open Source License (NCSA) + +Copyright (c) 2014, Advanced Micro Devices, Inc. +All rights reserved. + +Developed by: + + AMD Research and AMD HSA Software Development + + Advanced Micro Devices, Inc. + + www.amd.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal with the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in + the documentation and/or other materials provided with the distribution. + - Neither the names of , + nor the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior written + permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS WITH THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..d85ab66 --- /dev/null +++ b/README.md @@ -0,0 +1,129 @@ +USING THE RUNTIME CONFORMANCE SUITE + +The HSA Runtime Conformance Suite build environment utilizes the cmake +for automatic makefile generation targeting specific platforms. +The required version of cmake is version 2.8. The build & execution +environment requires the check test framework, version 0.9.12 or later, +and the 1.0 Final HSA runtime. + +In normal builds, CMake automatically determines the toolchain for host +builds based on system introspection and defaults. In cross-compiling +scenarios, a toolchain file may be specified with information about compiler +and utility paths. Please consult CMake documentation for more information. + +Linux Environment Setup + +The following steps will install the appropriate versions of +cmake, check and HSA in the default PATH and LD_LIBRARY_PATH directories. +Consult the appropriate documentation regarding installing the +executables and libraries in different locations. + + 1) Install the appropriate level of cmake on your system. The latest version + of cmake can be obtained from http://www.cmake.org/download/, where both + binary and source distributions are available. On ubuntu the cmake package + may come as a pre-installed package, but the following command will install + the default version for the current system: + + 'sudo apt-get install cmake' + + 2) Install the appropriate version of the check framework on the + build system. The check framework can be downloaded from + http://sourceforge.net/projects/check/files/latest/download. On ubuntu the + check test framework will install the default version on the + current system: + + 'sudo apt-get install check' + + 3) If the HSA runtime isn't installed, get the desired version of the runtime + from https://github.com/HSAFoundation/HSA-Runtime-AMD. Install the runtime + from either the fedora rpm or the ubuntu deb package provided in the repository. + This will install the runtime in the /opt/hsa directory. + +Windows Environment Setup + +The Runtime conformance suite can only be built on Windows if Cygwin development +environment is available. To install the required Cygwin resources execute the +link https://cygwin.com/setup-x86_64.exe. This will download the Cygwin installer +to the local system. Launch the Cygwin installer and install the entire Devel +category. Not all of the packages in Devel are required for Runtime conformance, +but it is safer and easier to install them all than to manually select the (several) +required components. + +BUILDING THE CONFORMANCE SUITE + +Before building the suite the locations of the HSA libraries and header files +must be specified using the HSA_INCLUDE_DIR and the HSA_LIBRARY_DIR cmake +variables. This is only required the HSA headers and libraries aren't installed +in the standard system directories or in the default /opt/hsa location. For +example, if HSA is installed for a local user, the cmake variables should +be specified on the cmake command line like this: + + `cmake -D HSA_INCLUDE_DIR:STRING=/home//hsa/include + -D HSA_LIBRARY_DIR=/home//hsa/lib CMakeLists.txt` + +On a Windows system the cmake command should be executed in a Cygwin64 terminal shell. +Also on Windows, the HSA_INCLUDE_DIR and HSA_LIBRARY_DIR cmake variables must +be set; there are no valid default locations. The HSA_LIBRARY_DIR variable +should point to the directory that contains the hsa-runtime64.dll file, not the +associated hsa-runtime64.lib file. + +To build the suite, create a build directory and run cmake on the CMakeLists.txt +file. After the make infrastructure is created, build the binaries with the +make command. The following sequence of commands, if run from the top level +directory, would build the conformance suite: + + `mkdir build && cd build && cmake <-D ...> .. && make` + +INSTALLING THE CONFORMANCE SUITE + +The conformance suite can be installed in a directory by issuing the `make install` +command. The default installation directory is '/usr/local/hsa_conformance'. This +can be changed to another location using the `cmake -D CMAKE_INSTALL_PREFIX= ..` +option. + +The user must have proper access to the install directory to both install +and execute the tests. + +RUNNING THE TESTS USING CMAKE + +Before running the tests the LD_LIBRARY_PATH environment variable must include +the PATH of the HSA runtime libraries. + +The HSA conformance tests can use the ctest execution environment. After build the +conformance test all currently enabled tests can be run by building the 'test' +target: + + `make test` + +RUNNING THE TESTS USING EXECUTION SCRIPTS + +It is also possible to use the execute.sh script provided in the installation to +run the test suite. First, install the test suite: + + `make install` + +This will transfer all of the tests, brig files and execution scripts to the install +directory (/usr/local/hsa_conformance is the default install directory). Change to +the install directory and run the following command to execute the conformance suite: + + `execute.sh test.lst` + +FREQUENTLY ASKED QUESTIONS + + Q1: When debugging a test case with gdb I can't step into the test functions? How do + I enable debugging? + A1: By default the check test framework will fork a new process for each test and + determines if it passes or fails by the code returned to the parent process. This allows + the parent process to remain insulated from signals (SIGSEGV) that are sent to the + child process. On most systems, GDB has no special support for debugging programs + which create additional processes using the fork function. When a program forks, GDB will + continue to debug the parent process and the child process will run unimpeded. This forking + behavior can be turned of by setting the CK_FORK environment varialble to 'no', e.g. + "export CK_FORK=no". + + Q2: Check appears to be generating a segfault when I run a test: + ../../src/check_pack.c:312: ../../src/check_msg.c:75: No messaging setup Segmentation fault (core dumped) + + A2: The check assertion system isn't particularly robust when it comes to multiple threads, especially if + CK_FORK=no is set. This segfault usually indicates that a multi-threaded test case is failing and + several of the threads are generating an ASSERT message concurrently. diff --git a/cmake/FindCheck.cmake b/cmake/FindCheck.cmake new file mode 100644 index 0000000..55b8591 --- /dev/null +++ b/cmake/FindCheck.cmake @@ -0,0 +1,28 @@ +if (CHECK_INCLUDE_DIR) + ## The check information is already in the cache. + set (CHECK_FIND_QUIETLY TRUE) +endif (CHECK_INCLUDE_DIR) + +## If the CHECK_INSTALL_DIR cmake variable is set, +## add it to the list of system directories to search +## for the check library. This is done in the find_path +## and find_library calls below. + +## Look for the check include file. +find_path (CHECK_INCLUDE_DIR NAMES check.h ${CHECK_INSTALL_DIR}/include) +## Look for the check library. +find_library (CHECK_LIBRARY NAMES check ${CHECK_INSTALL_DIR}/lib) + +## Handle the QUIETLY and REQUIRED arguments and set CHECK_FOUND to TRUE if +## all listed variables are TRUE. +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args (Check "Please install 'check' and 'check-devel' packages" CHECK_LIBRARY CHECK_INCLUDE_DIR) + +if (CHECK_FOUND) + set (CHECK_LIBRARIES ${CHECK_LIBRARY}) +else (CHECK_FOUND) + set (CHECK_LIBRARIES) +endif(CHECK_FOUND) + +mark_as_advanced (CHECK_INCLUDE_DIR) +mark_as_advanced (CHECK_LIBRARY) diff --git a/cmake/FindELF.cmake b/cmake/FindELF.cmake new file mode 100644 index 0000000..7662f7b --- /dev/null +++ b/cmake/FindELF.cmake @@ -0,0 +1,23 @@ +if (ELF_INCLUDE_DIR) + ## The elf information is already in the cache. + set (ELF_FIND_QUIETLY TRUE) +endif (ELF_INCLUDE_DIR) + +## Look for the check include file. +find_path (ELF_INCLUDE_DIR NAMES libelf.h) +## Look for the check library. +find_library (ELF_LIBRARY NAMES elf) + +## Handle the QUIETLY and REQUIRED arguments and set ELF_FOUND to TRUE if +## all listed variables are TRUE. +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args (ELF "Please install the 'elfutils' package" ELF_LIBRARY ELF_INCLUDE_DIR) + +if (ELF_FOUND) + set (ELF_LIBRARIES ${ELF_LIBRARY}) +else (ELF_FOUND) + set (ELF_LIBRARIES) +endif(ELF_FOUND) + +mark_as_advanced (ELF_INCLUDE_DIR) +mark_as_advanced (ELF_LIBRARY) diff --git a/cmake/FindHSA.cmake b/cmake/FindHSA.cmake new file mode 100644 index 0000000..c7bccb1 --- /dev/null +++ b/cmake/FindHSA.cmake @@ -0,0 +1,59 @@ +if (HSA_RUNTIME_INCLUDE_DIR) + ## The HSA information is already in the cache. + set (HSA_RUNTIME_FIND_QUIETLY TRUE) +endif (HSA_RUNTIME_INCLUDE_DIR) + +## Look for the hsa include file path. + +## If the HSA_INCLUDE_DIR variable is set, +## use it for the HSA_RUNTIME_INCLUDE_DIR variable. +## Otherwise set the value to /opt/hsa/include. +## Note that this can be set when running cmake +## by specifying -D HSA_INCLUDE_DIR=. + +if(NOT DEFINED HSA_INCLUDE_DIR) + set (HSA_INCLUDE_DIR "/opt/hsa/include") +endif() + +MESSAGE("HSA_INCLUDE_DIR=${HSA_INCLUDE_DIR}") + +find_path (HSA_RUNTIME_INCLUDE_DIR NAMES hsa.h PATHS ${HSA_INCLUDE_DIR}) + +## If the HSA_LIBRARY_DIR environment variable is set, +## use it for the HSA_RUNTIME_LIBRARY_DIR variable. +## Otherwise set the value to /opt/hsa/lib. +## Note that this can be set when running cmake +## by specifying -D HSA_LIBRARY_DIR=. + +if(NOT DEFINED HSA_LIBRARY_DIR) + set (HSA_LIBRARY_DIR "/opt/hsa/lib") +endif() + +MESSAGE("HSA_LIBRARY_DIR=${HSA_LIBRARY_DIR}") + +## Look for the hsa library and, if found, generate the directory. +if(DEFINED CYGWIN) + ## In CYGWIN set the library name directly to the hsa-runtime64.dll. + ## This is a temporary work-around for cmake limitations, and requires + ## that the HSA_RUNTIME_LIBRARY environment variable is set by the user. + set(HSA_RUNTIME_LIBRARY "${HSA_LIBRARY_DIR}/hsa-runtime64.dll") +else() + find_library (HSA_RUNTIME_LIBRARY NAMES hsa-runtime64 PATHS ${HSA_LIBRARY_DIR}) +endif() + +get_filename_component(HSA_RUNTIME_LIBRARY_DIR ${HSA_RUNTIME_LIBRARY} DIRECTORY) + +## Handle the QUIETLY and REQUIRED arguments and set HSA_FOUND to TRUE if +## all listed variables are TRUE. +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args (HSA "Please install 'hsa-runtime' package" HSA_RUNTIME_LIBRARY HSA_RUNTIME_INCLUDE_DIR) + +if (HSA_FOUND) + set (HSA_LIBRARIES ${HSA_LIBRARY}) +else (HSA_FOUND) + set (HSA_LIBRARIES) +endif(HSA_FOUND) + +mark_as_advanced (HSA_RUNTIME_INCLUDE_DIR) +mark_as_advanced (HSA_RUNTIME_LIBRARY_DIR) +mark_as_advanced (HSA_RUNTIME_LIBRARY) diff --git a/cmake/agent.cmake b/cmake/agent.cmake new file mode 100644 index 0000000..743ccf2 --- /dev/null +++ b/cmake/agent.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_agent) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/agent") + +## Included source files. +set (SOURCE_FILES hsa_agent.c test_concurrent_iterate.c test_concurrent_query.c test_iterate_null_data.c test_iterate_terminate.c test_query_attributes.c test_query_system_attributes.c) + +## Test list. +set (TEST_LIST iterate_null_data iterate_terminate iterate_query_attributes concurrent_query concurrent_iterate query_system_attributes) + +include (build) +include (test) diff --git a/cmake/api.cmake b/cmake/api.cmake new file mode 100644 index 0000000..2ba0a0c --- /dev/null +++ b/cmake/api.cmake @@ -0,0 +1,15 @@ +## Target executable name. +set (TARGET hsa_api) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/api") + +## Included source files. +set (SOURCE_FILES test_helper_func.c hsa_api.c test_hsa_agent_get_info.c test_hsa_agent_iterate_regions.c test_hsa_agent_get_exception_policies.c test_hsa_init.c test_hsa_iterate_agents.c test_hsa_memory_allocate.c test_hsa_memory_deregister.c test_hsa_memory_free.c test_hsa_memory_register.c test_hsa_queue_add_write_index_acq_rel.c test_hsa_queue_add_write_index_acquire.c test_hsa_queue_add_write_index_relaxed.c test_hsa_queue_add_write_index_release.c test_hsa_queue_cas_write_index_acq_rel.c test_hsa_queue_cas_write_index_acquire.c test_hsa_queue_cas_write_index_relaxed.c test_hsa_queue_cas_write_index_release.c test_hsa_queue_create.c test_hsa_queue_destroy.c test_hsa_queue_inactivate.c test_hsa_queue_load_read_index_acquire.c test_hsa_queue_load_read_index_relaxed.c test_hsa_queue_load_store_write_index_acquire_relaxed.c test_hsa_queue_load_store_write_index_relaxed_release.c test_hsa_region_get_info.c test_hsa_shut_down.c test_hsa_signal_add_acq_rel.c test_hsa_signal_add_acquire.c test_hsa_signal_add_relaxed.c test_hsa_signal_add_release.c test_hsa_signal_and_acq_rel.c test_hsa_signal_and_acquire.c test_hsa_signal_and_relaxed.c test_hsa_signal_and_release.c test_hsa_signal_cas_acq_rel.c test_hsa_signal_cas_acquire.c test_hsa_signal_cas_relaxed.c test_hsa_signal_cas_release.c test_hsa_signal_create.c test_hsa_signal_destroy.c test_hsa_signal_exchange_acq_rel.c test_hsa_signal_exchange_acquire.c test_hsa_signal_exchange_relaxed.c test_hsa_signal_exchange_release.c test_hsa_signal_load_acquire.c test_hsa_signal_load_relaxed.c test_hsa_signal_or_acq_rel.c test_hsa_signal_or_acquire.c test_hsa_signal_or_relaxed.c test_hsa_signal_or_release.c test_hsa_signal_store_relaxed.c test_hsa_signal_store_release.c test_hsa_signal_subtract_acq_rel.c test_hsa_signal_subtract_acquire.c test_hsa_signal_subtract_relaxed.c test_hsa_signal_subtract_release.c test_hsa_signal_xor_acq_rel.c test_hsa_signal_xor_acquire.c test_hsa_signal_xor_relaxed.c test_hsa_signal_xor_release.c test_hsa_status_string.c test_hsa_system_get_info.c test_hsa_system_extension_supported.c test_hsa_system_get_extension_table.c test_hsa_agent_extension_supported.c test_hsa_isa_get_info.c test_hsa_isa_compatible.c test_hsa_code_object_get_info.c test_hsa_code_symbol_get_info.c test_hsa_executable_create.c test_hsa_executable_destroy.c test_hsa_executable_load_code_object.c test_hsa_executable_get_info.c test_hsa_executable_symbol_get_info.c test_hsa_soft_queue_create.c test_hsa_isa_from_name.c +) + +## Test list. +set (TEST_LIST hsa_init hsa_shut_down hsa_shut_down_not_initialized hsa_shut_down_after_shut_down hsa_status_string hsa_status_string_not_initialized hsa_status_string_invalid_status hsa_status_string_invalid_ptr hsa_system_get_info hsa_system_get_info_not_initialized hsa_system_get_info_invalid_attribute hsa_system_get_info_invalid_ptr hsa_system_extension_supported hsa_system_extension_supported_not_initialized hsa_system_extension_supported_invalid_extension hsa_system_extension_supported_null_result_ptr hsa_system_get_extension_table hsa_system_get_extension_table_not_initialized hsa_system_get_extension_table_invalid_extension hsa_system_get_extension_table_null_table_ptr hsa_agent_get_info hsa_agent_get_info_not_initialized hsa_agent_get_info_invalid_agent hsa_agent_get_info_invalid_attribute hsa_agent_get_info_invalid_ptr hsa_iterate_agents hsa_iterate_agents_not_initialized hsa_iterate_agents_invalid_callback hsa_agent_get_exception_policies hsa_agent_get_exception_policies_not_initialized hsa_agent_get_exception_policies_invalid_agent hsa_agent_get_exception_policies_null_mask_ptr hsa_agent_get_exception_policies_invalid_profile hsa_agent_extension_supported hsa_agent_extension_supported_not_initialized hsa_agent_extension_supported_invalid_agent hsa_agent_extension_supported_invalid_extension hsa_agent_extension_supported_null_result_ptr hsa_signal_create hsa_signal_create_not_initialized hsa_signal_create_null_signal hsa_signal_create_invalid_arg hsa_signal_destroy hsa_signal_load_acquire hsa_signal_load_relaxed hsa_signal_store_release hsa_signal_store_relaxed hsa_signal_exchange_acq_rel hsa_signal_exchange_acquire hsa_signal_exchange_relaxed hsa_signal_exchange_release hsa_signal_cas_acq_rel hsa_signal_cas_acquire hsa_signal_cas_relaxed hsa_signal_cas_release hsa_signal_add_acq_rel hsa_signal_add_acquire hsa_signal_add_relaxed hsa_signal_add_release hsa_signal_subtract_acq_rel hsa_signal_subtract_acquire hsa_signal_subtract_relaxed hsa_signal_subtract_release hsa_signal_and_acq_rel hsa_signal_and_acquire hsa_signal_and_relaxed hsa_signal_and_release hsa_signal_or_acq_rel hsa_signal_or_acquire hsa_signal_or_relaxed hsa_signal_or_release hsa_signal_xor_acq_rel hsa_signal_xor_acquire hsa_signal_xor_relaxed hsa_signal_xor_release hsa_queue_create hsa_queue_create_not_initialized hsa_queue_create_invalid_agent hsa_queue_create_invalid_queue_creation hsa_queue_create_invalid_argument hsa_queue_destroy hsa_queue_destroy_not_initialized hsa_queue_destroy_invalid_queue hsa_queue_destroy_invalid_argument hsa_queue_inactivate hsa_queue_inactivate_not_initialized hsa_queue_inactivate_invalid_queue hsa_queue_inactivate_invalid_argument hsa_queue_load_read_index_acquire hsa_queue_load_read_index_relaxed hsa_queue_load_store_write_index_acquire_relaxed hsa_queue_load_store_write_index_relaxed_release hsa_queue_cas_write_index_acq_rel hsa_queue_cas_write_index_acquire hsa_queue_cas_write_index_relaxed hsa_queue_cas_write_index_release hsa_queue_add_write_index_acq_rel hsa_queue_add_write_index_acquire hsa_queue_add_write_index_relaxed hsa_queue_add_write_index_release hsa_memory_allocate hsa_memory_allocate_not_initialized hsa_memory_allocate_null_ptr hsa_memory_allocate_zero_size hsa_memory_allocate_invalid_allocation hsa_memory_allocate_invalid_region hsa_memory_free hsa_memory_free_not_initialized hsa_memory_register hsa_memory_register_not_initialized hsa_memory_register_null_ptr hsa_memory_register_zero_size hsa_memory_deregister hsa_memory_deregister_not_initialized hsa_region_get_info hsa_region_get_info_not_initialized hsa_region_get_info_invalid_region hsa_region_get_info_invalid_argument hsa_agent_iterate_regions hsa_agent_iterate_regions_not_initialized hsa_agent_iterate_regions_invalid_argument hsa_agent_iterate_regions_invalid_agent hsa_isa_from_name hsa_isa_from_name_null_name hsa_isa_from_name_null_isa hsa_isa_from_name_invalid_isa_name hsa_isa_get_info hsa_isa_get_info_not_initialized hsa_isa_get_info_invalid_isa hsa_isa_get_info_index_out_of_range hsa_isa_get_info_invalid_attribute hsa_isa_get_info_null_value hsa_isa_compatible hsa_isa_compatible_invalid_isa hsa_isa_compatible_null_result) + +include (build) +include (test) diff --git a/cmake/aql.cmake b/cmake/aql.cmake new file mode 100644 index 0000000..df67c90 --- /dev/null +++ b/cmake/aql.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_aql) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/aql") + +## Included source files. +set (SOURCE_FILES hsa_aql.c test_aql_barrier_and.c test_aql_barrier_bit_not_set.c test_aql_barrier_bit_set.c test_aql_barrier_cross_queue_dependency.c test_aql_barrier_cross_queue_dependency_negative_value.c test_aql_barrier_multiple_barriers.c test_aql_barrier_negative_value.c test_aql_barrier_or.c test_aql_group_memory.c test_aql_group_memory_overspecified.c test_aql_launch_size.c test_aql_private_memory.c test_aql_private_memory_overspecified.c test_helper_func.c test_aql_zero_wg_size.c) + +## Test list. +set (TEST_LIST aql_launch_size aql_barrier_bit_not_set aql_barrier_bit_set aql_barrier_cross_queue_dependency aql_barrier_cross_queue_dependency_negative_value aql_barrier_multiple_barriers aql_group_memory aql_group_memory_overspecified aql_private_memory aql_private_memory_overspecified aql_barrier_and aql_barrier_or aql_zero_wg_size) + +include (build) +include (test) diff --git a/cmake/async.cmake b/cmake/async.cmake new file mode 100644 index 0000000..8402321 --- /dev/null +++ b/cmake/async.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_async) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/async") + +## Included source files. +set (SOURCE_FILES hsa_async.c test_async_utils.c test_async_invalid_group_memory.c test_async_invalid_packet.c test_async_invalid_dimensions.c test_async_invalid_kernel_object.c test_async_invalid_workgroup_size.c) + +## Test list. +set (TEST_LIST async_invalid_group_memory async_invalid_dimensions async_invalid_kernel_object) + +include (build) +include (test) diff --git a/cmake/atomics.cmake b/cmake/atomics.cmake new file mode 100644 index 0000000..dabf6db --- /dev/null +++ b/cmake/atomics.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_memory_atomics) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/memory/atomics") + +## Source files. +set (SOURCE_FILES hsa_memory_atomics.c test_helper_func.c test_memory_add_atomic.c test_memory_and_atomic.c test_memory_cas_atomic.c test_memory_decrement_atomic.c test_memory_exchange_atomic.c test_memory_increment_atomic.c test_memory_load_store_atomic.c test_memory_maximum_atomic.c test_memory_minimum_atomic.c test_memory_or_atomic.c test_memory_subtract_atomic.c test_memory_xor_atomic.c) + +## Test list. +set (TEST_LIST memory_add_atomic memory_and_atomic memory_cas_atomic memory_decrement_atomic memory_exchange_atomic memory_increment_atomic memory_load_store_atomic memory_maximum_atomic memory_minimum_atomic memory_or_atomic memory_subtract_atomic memory_xor_atomic) + +include (build) +include (test) diff --git a/cmake/build.cmake b/cmake/build.cmake new file mode 100644 index 0000000..0a9886d --- /dev/null +++ b/cmake/build.cmake @@ -0,0 +1,20 @@ +## Build the list of source files. +set (SOURCE_LIST) + +foreach (SOURCE_FILE ${SOURCE_FILES}) + + set (SOURCE_LIST ${SOURCE_LIST} ${SRC_DIR}/${SOURCE_FILE}) + +endforeach () + +## Specify the link dierctories. +link_directories (${LIBRARY_LINK_DIRS}) + +## Add the executable. +add_executable (${TARGET} ${SOURCE_LIST}) + +## Specify the link targets. +target_link_libraries (${TARGET} ${TEST_LIBRARIES} ${SYSTEM_LIBRARIES} ${HSA_LIBRARIES}) + +## Indicate the executable should be installed. +install (PROGRAMS ${CMAKE_BINARY_DIR}/${TARGET} DESTINATION ${INSTALL_DIR}) diff --git a/cmake/buildlib.cmake b/cmake/buildlib.cmake new file mode 100644 index 0000000..172013c --- /dev/null +++ b/cmake/buildlib.cmake @@ -0,0 +1,9 @@ +## Build the list of source files. +foreach (SOURCE_FILE ${SOURCE_FILES}) + + set (SOURCE_LIST ${SOURCE_LIST} ${SRC_DIR}/${SOURCE_FILE}) + +endforeach () + +## Add the library. +add_library (${TARGET} STATIC ${SOURCE_LIST}) diff --git a/cmake/code.cmake b/cmake/code.cmake new file mode 100644 index 0000000..30a3bac --- /dev/null +++ b/cmake/code.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_code) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/code") + +## Included source files. +set (SOURCE_FILES hsa_code.c test_helper_func.c test_code_define_global_agent.c test_code_define_global_program.c test_code_define_readonly_agent.c test_code_mixed_scope.c test_code_module_scope_symbol.c test_code_program_scope_symbol.c test_code_multiple_executables.c test_code_serialize_deserialize.c test_code_iterate_symbols.c test_code_kernarg_alignment.c test_code_recursive_kernel_function.c) + +## Test list. +set (TEST_LIST code_define_globaL_agent code_define_global_program code_define_readonly_agent code_mixed_scope code_module_scope_symbol code_program_scope_symbol code_multiple_executables code_serialize_deserialize code_iterate_symbols code_kernarg_alignment code_recursive_kernel_function) + +include (build) +include (test) diff --git a/cmake/common.cmake b/cmake/common.cmake new file mode 100644 index 0000000..e059722 --- /dev/null +++ b/cmake/common.cmake @@ -0,0 +1,40 @@ +## Set the value of LIBRARY_LINK_DIRS. +set (LIBRARY_LINK_DIRS ${HSA_RUNTIME_LIBRARY_DIR}) + +## Name of the utilities library. +set (UTILS_LIBRARY hsa_utils) + +## Test libraries. +set (TEST_LIBRARIES ${UTILS_LIBRARY}) + +## HSA libraries. +set (HSA_LIBRARIES ${HSA_RUNTIME_LIBRARY}) + +## System libraries. +set (SYSTEM_LIBRARIES check rt m pthread) + +## Coding standard used. +set (C_STANDARD "-std=c99") + +## Include directories compilation command. +set (INCLUDE_DIRS "-I ${CMAKE_SOURCE_DIR}/src/framework -I ${CMAKE_SOURCE_DIR}/src/utils -I ${HSA_RUNTIME_INCLUDE_DIR}") +MESSAGE("-- INCLUDE_DIRS ${INCLUDE_DIRS}") + +## C flags. +set (CMAKE_C_FLAGS "${C_STANDARD} ${INCLUDE_DIRS}") +MESSAGE("-- CMAKE_C_FLAGS ${CMAKE_C_FLAGS}") + +## Link flags. +set (CMAKE_EXE_LINKER_FLAGS "-Wl,--unresolved-symbols=ignore-in-shared-libs") +MESSAGE("-- CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINK_FLAGS}") + +## Execution script to use for testing. +## Default to run.sh. +if(NOT DEFINED EXECUTION_SCRIPT) + set (EXECUTION_SCRIPT "run.sh") +endif() + +## Set the installation directory +if(NOT DEFINED INSTALL_DIR) + set (INSTALL_DIR "hsa_conformance") +endif() diff --git a/cmake/ext_api.cmake b/cmake/ext_api.cmake new file mode 100644 index 0000000..5acb261 --- /dev/null +++ b/cmake/ext_api.cmake @@ -0,0 +1,16 @@ +## Target executable name. +set (TARGET hsa_ext_api) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/extensions/ext_api") + +## Included source files. +set (SOURCE_FILES hsa_ext_api.c test_hsa_ext_program_create.c test_hsa_ext_program_finalize test_hsa_ext_program_destroy.c) + +## Test list. +## Disable the hsa_ext_program_destroy_invalid_program test. +set (TEST_LIST hsa_ext_program_create hsa_ext_program_create_not_initialized hsa_ext_program_create_invalid_argument hsa_ext_program_destroy hsa_ext_program_destroy_not_initialized hsa_ext_program_destroy_invalid_program hsa_ext_program_add_module hsa_ext_program_add_module_not_initialized hsa_ext_program_add_module_errors) +##set (TEST_LIST hsa_ext_program_create hsa_ext_program_create_not_initialized hsa_ext_program_create_invalid_argument hsa_ext_program_destroy hsa_ext_program_destroy_not_initialized hsa_ext_program_add_module hsa_ext_program_add_module_not_initialized hsa_ext_program_add_module_errors) + +include (build) +include (test) diff --git a/cmake/finalization.cmake b/cmake/finalization.cmake new file mode 100644 index 0000000..8b5e3a3 --- /dev/null +++ b/cmake/finalization.cmake @@ -0,0 +1,17 @@ +## Target executable name. +set (TARGET hsa_finalization) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/extensions/finalization") + +## Included source files. +set (SOURCE_FILES test_helper_func.c test_finalization_concurrent_finalization.c test_finalization_dependent_modules.c test_finalization_incompatible_module.c test_finalization_invalid_module.c test_finalization_module_already_included.c test_finalization_module_count.c test_finalization_multiple_modules.c test_finalization_out_of_resources.c test_finalization_control_directives_max_dynamic_group_size.c hsa_finalization.c test_finalization_control_directives_max_flat_grid_size.c test_finalization_control_directives_max_flat_workgroup_size.c test_finalization_control_directives_required_grid_size.c test_finalization_control_directives_required_workgroup_size.c test_finalization_control_directives_required_dim.c) + +## Disable control directive tests and invalid module test +## set (TEST_LIST finalization_concurrent_finalization finalization_dependent_modules finalization_incompatible_module finalization_invalid_module finalization_module_already_included finalization_module_count finalization_multiple_modules finalization_control_directives_max_dynamic_group_size finalization_control_directives_max_flat_grid_size finalization_control_directives_max_flat_workgroup_size finalization_control_directives_required_grid_size finalization_control_directives_required_workgroup_size finalization_control_directives_required_dim) + +## Test list. +set (TEST_LIST finalization_concurrent_finalization finalization_dependent_modules finalization_incompatible_module finalization_module_already_included finalization_module_count finalization_multiple_modules) + +include (build) +include (test) diff --git a/cmake/image_clear.cmake b/cmake/image_clear.cmake new file mode 100644 index 0000000..c3d86a1 --- /dev/null +++ b/cmake/image_clear.cmake @@ -0,0 +1,38 @@ +## Target executable name. +set (TARGET hsa_image_clear) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/extensions/images/clear") + +## Included source files. +set (SOURCE_FILES hsa_image_clear.c test_image_clear.c) + +include (image_data) + +set (TEST_LIST "") + +set (VALID "") + +## Test list +foreach (CHANNEL_TYPE ${CHANNEL_TYPES}) + + foreach (CHANNEL_ORDER ${CHANNEL_ORDERS}) + + foreach (GEOMETRY ${GEOMETRIES}) + + valid_image(${CHANNEL_TYPE} ${CHANNEL_ORDER} ${GEOMETRY} VALID) + + if(${VALID} MATCHES TRUE) + + set (TEST_LIST ${TEST_LIST} image_clear_${CHANNEL_TYPE}_${CHANNEL_ORDER}_${GEOMETRY}) + + endif() + + endforeach() + + endforeach() + +endforeach() + +include (build) +include (test) diff --git a/cmake/image_copy.cmake b/cmake/image_copy.cmake new file mode 100644 index 0000000..c8466e4 --- /dev/null +++ b/cmake/image_copy.cmake @@ -0,0 +1,38 @@ +## Target executable name. +set (TARGET hsa_image_copy) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/extensions/images/copy") + +## Included source files. +set (SOURCE_FILES hsa_image_copy.c test_image_copy.c) + +include (image_data) + +set (TEST_LIST "") + +set (VALID "") + +## Test list +foreach (CHANNEL_TYPE ${CHANNEL_TYPES}) + + foreach (CHANNEL_ORDER ${CHANNEL_ORDERS}) + + foreach (GEOMETRY ${GEOMETRIES}) + + valid_image(${CHANNEL_TYPE} ${CHANNEL_ORDER} ${GEOMETRY} VALID) + + if(${VALID} MATCHES TRUE) + + set (TEST_LIST ${TEST_LIST} image_copy_${CHANNEL_TYPE}_${CHANNEL_ORDER}_${GEOMETRY}) + + endif() + + endforeach() + + endforeach() + +endforeach() + +include (build) +include (test) diff --git a/cmake/image_data.cmake b/cmake/image_data.cmake new file mode 100644 index 0000000..b05a12f --- /dev/null +++ b/cmake/image_data.cmake @@ -0,0 +1,97 @@ +set (CHANNEL_TYPES SNORM_INT8 SNORM_INT16 UNORM_INT8 UNORM_INT16 UNORM_INT24 UNORM_SHORT_555 UNORM_SHORT_565 UNORM_SHORT_101010 SIGNED_INT8 SIGNED_INT16 SIGNED_INT32 UNSIGNED_INT8 UNSIGNED_INT16 UNSIGNED_INT32 HALF_FLOAT FLOAT) + +## Testing for SGRB, SRGBX SRGBA and SBGRA channel orders is currently not enabled. +## set (CHANNEL_ORDERS A R RX RG RGX RA RGB RGBX RGBA BGRA ARGB ABGR SRGB SRGBX SRGBA SBGRA INTENSITY LUMINANCE DEPTH DEPTH_STENCIL) +set (CHANNEL_ORDERS A R RX RG RGX RA RGB RGBX RGBA BGRA ARGB ABGR INTENSITY LUMINANCE DEPTH DEPTH_STENCIL) + +set (GEOMETRIES 1D 2D 3D 1DA 2DA 1DB 2DDEPTH 2DADEPTH) + +function(valid_image CHANNEL_TYPE CHANNEL_ORDER IMAGE_GEOMETRY VALID) + + set (VALID FALSE PARENT_SCOPE) + + string(REGEX MATCH "^(SNORM|UNORM|SIGNED|UNSIGNED)_(INT8|INT16|INT32)$" CHANNEL_TYPE_MATCH ${CHANNEL_TYPE}) + string(REGEX MATCH "^(HALF_FLOAT|FLOAT)$" CHANNEL_FLOAT_TYPE_MATCH ${CHANNEL_TYPE}) + string(REGEX MATCH "^(A|R|RX|RG|RGX|RA|RGBA)$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + string(REGEX MATCH "^(1D|2D|3D|1DA|2DA|1DB)$" GEOMETRY_MATCH ${IMAGE_GEOMETRY}) + if(CHANNEL_TYPE_MATCH OR CHANNEL_FLOAT_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + + string(REGEX MATCH "^UNORM_SHORT_(555|565|101010)$" CHANNEL_TYPE_MATCH ${CHANNEL_TYPE}) + string(REGEX MATCH "^(RGB|RGBX)$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + if(CHANNEL_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + + string(REGEX MATCH "^(UNORM|SNORM|SIGNED|UNSIGNED)_INT8$" CHANNEL_TYPE_MATCH ${CHANNEL_TYPE}) + string(REGEX MATCH "^(BGRA|ARGB|ABGR)$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + if(CHANNEL_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + + string(REGEX MATCH "^UNORM_INT8$" CHANNEL_TYPE_MATCH ${CHANNEL_TYPE}) + string(REGEX MATCH "^(SRGB|SRGBX|SRGBA|SBGRA)$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + if(CHANNEL_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + + string(REGEX MATCH "^INTENSITY$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + string(REGEX MATCH "^(UNORM|SNORM)_(INT8|INT16)$" CHANNEL_TYPE_MATCH ${CHANNEL_TYPE}) + string(REGEX MATCH "^(HALF_FLOAT|FLOAT)$" CHANNEL_FLOAT_TYPE_MATCH ${CHANNEL_TYPE}) + if(CHANNEL_TYPE_MATCH OR CHANNEL_FLOAT_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + + string(REGEX MATCH "^LUMINANCE$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + string(REGEX MATCH "^(HALF_FLOAT|FLOAT)$" CHANNEL_FLOAT_TYPE_MATCH ${CHANNEL_TYPE}) + string(REGEX MATCH "^(2DDEPTH|2DADEPTH)$" GEOMETRY_MATCH ${IMAGE_GEOMETRY}) + if(CHANNEL_TYPE_MATCH OR CHANNEL_FLOAT_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + + string(REGEX MATCH "^DEPTH$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + string(REGEX MATCH "^UNORM_(INT24|INT16)$" CHANNEL_TYPE_MATCH ${CHANNEL_TYPE}) + if(CHANNEL_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + + string(REGEX MATCH "^DEPTH_STENCIL$" CHANNEL_ORDER_MATCH ${CHANNEL_ORDER}) + string(REGEX MATCH "^UNORM_INT24$" CHANNEL_TYPE_MATCH ${CHANNEL_TYPE}) + if(CHANNEL_TYPE_MATCH) + if(CHANNEL_ORDER_MATCH) + if(GEOMETRY_MATCH) + set (VALID TRUE PARENT_SCOPE) + endif() + endif() + endif() + +endfunction() diff --git a/cmake/image_import_export.cmake b/cmake/image_import_export.cmake new file mode 100644 index 0000000..0d64267 --- /dev/null +++ b/cmake/image_import_export.cmake @@ -0,0 +1,34 @@ +## Target executable name. +set (TARGET hsa_image_import_export) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/extensions/images/import_export") + +## Included source files. +set (SOURCE_FILES hsa_image_import_export.c test_image_import_export.c) + +include (image_data) + +## Test list +foreach (CHANNEL_TYPE ${CHANNEL_TYPES}) + + foreach (CHANNEL_ORDER ${CHANNEL_ORDERS}) + + foreach (GEOMETRY ${GEOMETRIES}) + + valid_image(${CHANNEL_TYPE} ${CHANNEL_ORDER} ${GEOMETRY} VALID) + + if(${VALID} MATCHES TRUE) + + set (TEST_LIST ${TEST_LIST} image_import_export_${CHANNEL_TYPE}_${CHANNEL_ORDER}_${GEOMETRY}) + + endif() + + endforeach() + + endforeach() + +endforeach() + +include (build) +include (test) diff --git a/cmake/init.cmake b/cmake/init.cmake new file mode 100644 index 0000000..786aad2 --- /dev/null +++ b/cmake/init.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_init) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/init") + +## Included source files. +set (SOURCE_FILES hsa_init.c test_concurrent_init.c test_concurrent_init_shutdown.c test_concurrent_shutdown.c test_refcount.c test_reinitialize.c) + +## Test list. +set (TEST_LIST concurrent_init concurrent_shutdown concurrent_init_shutdown refcount reinitialize) + +include (build) +include (test) diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake new file mode 100644 index 0000000..24e2763 --- /dev/null +++ b/cmake/kernel.cmake @@ -0,0 +1,18 @@ +## Specify the BRIG_DIR. +set (BRIG_DIR "${CMAKE_SOURCE_DIR}/src/kernels") + +## Included source files. +set (BRIG_FILES agent_dispatch.brig depend_module1.brig depend_module2.brig global_vector_copy.brig global_agent_vector_copy.brig group_memory.brig init_data.brig memory_ops.brig mixed_scope.brig module_scope.brig no_op2.brig no_op.brig no_op_small.brig private_memory.brig program_scope.brig readonly_vector_copy.brig signal_operations.brig vector_copy.brig verify_image_region.brig kernarg_align.brig recursive_func.brig control_device.brig) + +add_custom_target(copy-brig-files ALL) + +foreach (BRIG_FILE ${BRIG_FILES}) + + ## Copy to the binary directory to support `make test`. + add_custom_command (TARGET copy-brig-files COMMAND ${CMAKE_COMMAND} -E copy ${BRIG_DIR}/${BRIG_FILE} ${CMAKE_BINARY_DIR}) + + ## Add support for the `make install` command. + install (FILES ${BRIG_DIR}/${BRIG_FILE} DESTINATION ${INSTALL_DIR}) + +endforeach(BRIG_FILE) + diff --git a/cmake/memory.cmake b/cmake/memory.cmake new file mode 100644 index 0000000..62f7368 --- /dev/null +++ b/cmake/memory.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_memory) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/memory") + +## Included source files. +set (SOURCE_FILES hsa_memory.c test_helper_func.c test_memory_allocated_vector_copy_heap.c test_memory_allocated_vector_copy_stack.c test_memory_allocate_max_size.c test_memory_allocate_zero_size.c test_memory_basic_allocate_free.c test_memory_basic_register_deregister.c test_memory_coherence_after_register.c test_memory_concurrent_allocate.c test_memory_concurrent_deregister.c test_memory_concurrent_free.c test_memory_concurrent_register.c test_memory_copy_allocated_to_allocated.c test_memory_copy_allocated_to_registered.c test_memory_copy_registered_to_allocated.c test_memory_copy_registered_to_registered.c test_memory_device.c test_memory_group_dynamic_allocation.c test_memory_minimum_region.c test_memory_region_concurrent_get_info.c test_memory_region_alignment.c test_memory_register_subrange.c test_memory_vector_copy_between_stack_and_heap.c test_memory_vector_copy_heap_not_registered.c test_memory_vector_copy_heap_registered.c test_memory_vector_copy_stack_not_registered.c test_memory_vector_copy_stack_registered.c test_memory_copy_system_and_global.c test_memory_assign_agent.c) + +## Test list. +set (TEST_LIST memory_allocated_vector_copy_heap memory_allocated_vector_copy_stack memory_allocate_max_size memory_allocate_zero_size memory_assign_agent memory_basic_allocate_free memory_basic_register_deregister memory_coherence_after_register memory_concurrent_allocate memory_concurrent_deregister memory_concurrent_free memory_concurrent_register memory_copy_allocated_to_allocated memory_copy_allocated_to_registered memory_copy_registered_to_allocated memory_copy_registered_to_registered memory_copy_system_and_global memory_group_dynamic_allocation memory_minimum_region memory_region_concurrent_get_info memory_region_alignment memory_register_subrange memory_vector_copy_between_stack_and_heap memory_vector_copy_heap_not_registered memory_vector_copy_heap_registered memory_vector_copy_stack_not_registered memory_vector_copy_stack_registered) + +include (build) +include (test) diff --git a/cmake/queue.cmake b/cmake/queue.cmake new file mode 100644 index 0000000..f3e8483 --- /dev/null +++ b/cmake/queue.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_queue) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/queue") + +## Source files. +set (SOURCE_FILES hsa_queue.c test_queue_create_concurrent.c test_queue_create_parameters.c test_queue_callback.c test_queue_destroy_concurrent.c test_queue_full.c test_queue_dispatch_concurrent.c test_queue_inactivate.c test_queue_size_create.c test_queue_multi_gap.c test_queue_write_index_add_acq_rel_ordering.c test_queue_write_index_add_acquire_release_ordering.c test_queue_write_index_add_atomic.c test_queue_write_index_cas_acq_rel_ordering.c test_queue_write_index_cas_acquire_release_ordering.c test_queue_write_index_cas_atomic.c test_queue_write_index_load_store_atomic.c test_queue_multiple_queues.c test_queue_multiple_dispatch.c) + +## Test list. +set (TEST_LIST queue_create_parameters queue_callback queue_destroy_concurrent queue_dispatch_concurrent queue_full queue_multiple_dispatch queue_inactivate queue_size_create queue_multiple_queues queue_multi_gap queue_write_index_add_acq_rel_ordering queue_write_index_add_acquire_release_ordering queue_write_index_add_atomic queue_write_index_cas_acq_rel_ordering queue_write_index_cas_acquire_release_ordering queue_write_index_cas_atomic) + +include (build) +include (test) diff --git a/cmake/script.cmake b/cmake/script.cmake new file mode 100644 index 0000000..6726832 --- /dev/null +++ b/cmake/script.cmake @@ -0,0 +1,10 @@ +## Specify the SCRIPT_DIR. +set (SCRIPT_DIR "${CMAKE_SOURCE_DIR}/script") + +## Install the run.sh script in the build directory for `make test` support +configure_file(${SCRIPT_DIR}/run.sh run.sh COPYONLY) +## Install the execute.sh script in the build directory for direct execution +configure_file(${SCRIPT_DIR}/execute.sh execute.sh COPYONLY) + +## Install the execute.sh script for traditional execute support +install(PROGRAMS ${SCRIPT_DIR}/execute.sh DESTINATION ${INSTALL_DIR}) diff --git a/cmake/signals.cmake b/cmake/signals.cmake new file mode 100644 index 0000000..66894c2 --- /dev/null +++ b/cmake/signals.cmake @@ -0,0 +1,14 @@ +## Target executable name. +set (TARGET hsa_signals) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/core/signals") + +## Included source files. +set (SOURCE_FILES hsa_signals.c test_signal_create_concurrent.c test_signal_create_initial_value.c test_signal_create_max_consumers.c test_signal_create_one_consumers.c test_signal_create_zero_consumers.c test_signal_destroy_concurrent.c test_signal_kernel_multi_set.c test_signal_kernel_multi_wait.c test_signal_kernel_set.c test_signal_kernel_wait.c test_signal_wait_add.c test_signal_wait_and.c test_signal_wait_cas.c test_signal_wait_exchange.c test_signal_wait_or.c test_signal_wait_store.c test_signal_wait_subtract.c test_signal_wait_xor.c test_signal_store_release_load_acquire_ordering.c test_signal_store_release_load_acquire_ordering_transitive.c test_signal_load_store_atomic.c test_signal_add_acq_rel_ordering.c test_signal_add_acq_rel_ordering_transitive.c test_signal_add_acquire_release_ordering.c test_signal_add_acquire_release_ordering_transitive.c test_signal_add_atomic.c test_signal_and_acq_rel_ordering.c test_signal_and_acq_rel_ordering_transitive.c test_signal_and_acquire_release_ordering.c test_signal_and_acquire_release_ordering_transitive.c test_signal_and_atomic.c test_signal_cas_acq_rel_ordering.c test_signal_cas_acq_rel_ordering_transitive.c test_signal_cas_acquire_release_ordering.c test_signal_cas_acquire_release_ordering_transitive.c test_signal_cas_atomic.c test_signal_exchange_acq_rel_ordering.c test_signal_exchange_acq_rel_ordering_transitive.c test_signal_exchange_acquire_release_ordering.c test_signal_exchange_acquire_release_ordering_transitive.c test_signal_exchange_atomic.c test_signal_or_acq_rel_ordering.c test_signal_or_acq_rel_ordering_transitive.c test_signal_or_acquire_release_ordering.c test_signal_or_acquire_release_ordering_transitive.c test_signal_or_atomic.c test_signal_subtract_acq_rel_ordering.c test_signal_subtract_acq_rel_ordering_transitive.c test_signal_subtract_acquire_release_ordering_transitive.c test_signal_subtract_atomic.c test_signal_xor_acq_rel_ordering.c test_signal_xor_acq_rel_ordering_transitive.c test_signal_xor_acquire_release_ordering.c test_signal_xor_acquire_release_ordering_transitive.c test_signal_xor_atomic.c test_signal_wait_conditions.c test_signal_wait_satisfied_conditions.c test_signal_wait_expectancy.c test_signal_wait_utils.c test_signal_wait_timeout.c) + +## Test list. +set (TEST_LIST signal_create_concurrent signal_create_initial_value signal_create_max_consumers signal_create_one_consumers signal_create_zero_consumers signal_destroy_concurrent signal_kernel_multi_set signal_kernel_multi_wait signal_kernel_set signal_kernel_wait signal_wait_acquire_timeout signal_wait_acquire_add signal_wait_acquire_and signal_wait_acquire_or signal_wait_acquire_subtract signal_wait_acquire_xor signal_wait_relaxed_timeout signal_wait_relaxed_add signal_wait_relaxed_and signal_wait_relaxed_or signal_wait_relaxed_subtract signal_wait_relaxed_xor signal_wait_conditions signal_wait_expectancy signal_wait_satisfied_conditions signal_wait_store_release signal_wait_store_relaxed signal_store_release_load_acquire_ordering signal_store_release_load_acquire_ordering_transitive signal_load_store_atomic signal_add_acq_rel_ordering signal_add_acq_rel_ordering_transitive signal_add_acquire_release_ordering signal_add_acquire_release_ordering_transitive signal_add_atomic_acq_rel signal_add_atomic_acquire signal_add_atomic_release signal_add_atomic_relaxed signal_and_acq_rel_ordering signal_and_acq_rel_ordering_transitive signal_and_acquire_release_ordering signal_and_acquire_release_ordering_transitive signal_and_atomic_acq_rel signal_and_atomic_acquire signal_and_atomic_release signal_and_atomic_relaxed signal_cas_acq_rel_ordering signal_cas_acquire_release_ordering signal_cas_atomic_acq_rel signal_cas_atomic_acquire signal_cas_atomic_release signal_cas_atomic_relaxed signal_exchange_acq_rel_ordering signal_exchange_acquire_release_ordering signal_exchange_acquire_release_ordering_transitive signal_exchange_atomic_acq_rel signal_exchange_atomic_acquire signal_exchange_atomic_release signal_exchange_atomic_relaxed signal_or_acq_rel_ordering signal_or_acq_rel_ordering_transitive signal_or_acquire_release_ordering signal_or_acquire_release_ordering_transitive signal_or_atomic_acq_rel signal_or_atomic_acquire signal_or_atomic_release signal_or_atomic_relaxed signal_subtract_acq_rel_ordering signal_subtract_acq_rel_ordering_transitive signal_subtract_acquire_release_ordering_transitive signal_subtract_atomic_acq_rel signal_subtract_atomic_acquire signal_subtract_atomic_release signal_subtract_atomic_relaxed signal_xor_acq_rel_ordering signal_xor_acq_rel_ordering_transitive signal_xor_acquire_release_ordering signal_xor_acquire_release_ordering_transitive signal_xor_atomic_acq_rel signal_xor_atomic_acquire signal_xor_atomic_release signal_xor_atomic_relaxed) + +include (build) +include (test) diff --git a/cmake/test.cmake b/cmake/test.cmake new file mode 100644 index 0000000..c69f733 --- /dev/null +++ b/cmake/test.cmake @@ -0,0 +1,22 @@ +## Add tests to the test list + +set (COMMAND_STRING "") + +foreach (TEST ${TEST_LIST}) + + string (STRIP ${TEST} TEST) + + if (TEST) + + add_test (NAME ${TEST} WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMAND ${EXECUTION_SCRIPT} ${TARGET} ${TEST}) + + file (APPEND ${CMAKE_BINARY_DIR}/test.lst "${TEST}:${TARGET}\n") + + endif () + +endforeach () + +## Install the test.lst file to support traditional execution +install(FILES ${CMAKE_BINARY_DIR}/test.lst DESTINATION ${INSTALL_DIR}) + +set (TEST_LIST "") diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 0000000..95618b0 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,11 @@ +## Test utilities library name. +set (TARGET ${UTILS_LIBRARY}) + +## Specify the SRC_DIR. +set (SRC_DIR "${CMAKE_SOURCE_DIR}/src/utils") + +## Included source files. +set (SOURCE_FILES agent_utils.c concurrent_utils.c dispatch_utils.c finalize_utils.c image_utils.c queue_utils.c) + +## Library build directives. +include(buildlib) diff --git a/script/execute.sh b/script/execute.sh new file mode 100755 index 0000000..a8f8bf6 --- /dev/null +++ b/script/execute.sh @@ -0,0 +1,180 @@ +################################################################################ +## +## ============================================================================= +## HSA Runtime Conformance Release License +## ============================================================================= +## The University of Illinois/NCSA +## Open Source License (NCSA) +## +## Copyright (c) 2014, Advanced Micro Devices, Inc. +## All rights reserved. +## +## Developed by: +## +## AMD Research and AMD HSA Software Development +## +## Advanced Micro Devices, Inc. +## +## www.amd.com +## +## Permission is hereby granted, free of charge, to any person obtaining a copy +## of this software and associated documentation files (the "Software"), to +## deal with the Software without restriction, including without limitation +## the rights to use, copy, modify, merge, publish, distribute, sublicense, +## and/or sell copies of the Software, and to permit persons to whom the +## Software is furnished to do so, subject to the following conditions: +## +## - Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimers. +## - Redistributions in binary form must reproduce the above copyright +## notice, this list of conditions and the following disclaimers in +## the documentation and/or other materials provided with the distribution. +## - Neither the names of , +## nor the names of its contributors may be used to endorse or promote +## products derived from this Software without specific prior written +## permission. +## +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +## DEALINGS WITH THE SOFTWARE. +## +################################################################################ + +#!/bin/bash + +function usage() { + echo "execute.sh [test file]" +} + +#Set the attribute variables +TEST_SET_FILE=$1 + +#Check the variables for correct values +if [ ! -e ${TEST_SET_FILE} ]; then + echo "The test set file doesn't not exist" + usage + exit -1 +fi + +## Print the header +START_TIME=`date` +MACHINE_NAME=`uname --nodename` +PROCESSOR_TYPE=`uname --processor` +OPERATING_SYSTEM=`uname --operating-system` +KERNEL_NAME=`uname --kernel-name` +KERNEL_VERSION=`uname --kernel-version` + +echo "================================================================================" +echo " HSA Runtime Conformance Log" +echo "================================================================================" +echo " Date: ${START_TIME}" +echo " Machine: ${MACHINE_NAME}" +echo " Processor: ${PROCESSOR_TYPE}" +echo " Operating System: ${OPERATING_SYSTEM}" +echo " Kernel: ${KERNEL_NAME} - ${KERNEL_VERSION}" +echo "================================================================================" + +## Set important environment variables +export CK_FORK=no +export PATH=$PATH:$PWD + +# Test metrics +TOTAL_FAILED=0 +TOTAL_PASSED=0 +TOTAL_ERROR=0 +TOTAL_NA=0 +TOTAL_TOTAL=0 +GROUP_FAILED=0 +GROUP_PASSED=0 +GROUP_ERROR=0 +GROUP_NA=0 +GROUP_TOTAL=0 +TEST_GROUP="" +TEST_FAILURES=() + +while read -r LINE; do + IFS=":"; declare -a PARAMS=($LINE) + + # Determine if a new test group is being used. + # If it is, start a new test group. + if [[ ${TEST_GROUP} != ${PARAMS[1]} ]]; then + # Print the results of the previous group. + if [[ -n ${TEST_GROUP} ]]; then + echo " Passed: ${GROUP_PASSED} Failed: ${GROUP_FAILED} Error: ${GROUP_ERROR} Total: ${GROUP_TOTAL}" + echo "" + + # Update the total metrics. + TOTAL_FAILED=$[TOTAL_FAILED + GROUP_FAILED] + TOTAL_PASSED=$[TOTAL_PASSED + GROUP_PASSED] + TOTAL_ERROR=$[TOTAL_ERROR + GROUP_ERROR] + TOTAL_NA=$[TOTAL_NA + GROUP_NA] + TOTAL_TOTAL=$[TOTAL_TOTAL + GROUP_TOTAL] + + # Reset the group metrics. + GROUP_FAILED=0 + GROUP_PASSED=0 + GROUP_ERROR=0 + GROUP_NA=0 + GROUP_TOTAL=0 + fi + + # Set the test group to the new group. + TEST_GROUP=${PARAMS[1]} + + # Print header for the new group. + GROUP_START_TIME=`date` + echo "${TEST_GROUP} - ${GROUP_START_TIME}" + fi + + export CK_RUN_CASE=${PARAMS[0]} + echo "Running ${PARAM[0]}" >> results.out + ${PARAMS[1]} > results.out & + wait + rc=$? + + if [ $rc -ne 0 ]; then + GROUP_FAILED=$[GROUP_FAILED + 1] + TEST_FAILURES+=("${PARAMS[1]} - ${PARAMS[0]}") + else + GROUP_PASSED=$[GROUP_PASSED + 1] + fi + + GROUP_TOTAL=$[GROUP_TOTAL + 1] + +done < "$TEST_SET_FILE" + +# Update the total metric for the final group +TOTAL_FAILED=$[TOTAL_FAILED + GROUP_FAILED] +TOTAL_PASSED=$[TOTAL_PASSED + GROUP_PASSED] +TOTAL_ERROR=$[TOTAL_ERROR + GROUP_ERROR] +TOTAL_NA=$[TOTAL_NA + GROUP_NA] +TOTAL_TOTAL=$[TOTAL_TOTAL + GROUP_TOTAL] + +## Print the final groups results +echo " Passed: ${GROUP_PASSED} Failed: ${GROUP_FAILED} Error: ${GROUP_ERROR} Total: ${GROUP_TOTAL}" +echo "" + +## Print total results +echo "================================================================================" +echo "Testrun" +echo " Passed: ${TOTAL_PASSED} Failed: ${TOTAL_FAILED} Error: ${TOTAL_ERROR} Total: ${TOTAL_TOTAL}" +echo "================================================================================" +echo "" + +## Print out any failures and exit + +if [ ${#TEST_FAILURES[@]} -ne 0 ]; then + echo "Failed tests:" + + for TEST in "${TEST_FAILURES[@]}"; do + echo " ${TEST}" + done + + exit 1 +else + exit 0 +fi diff --git a/script/run.sh b/script/run.sh new file mode 100755 index 0000000..997aef3 --- /dev/null +++ b/script/run.sh @@ -0,0 +1,49 @@ +################################################################################ +## +## ============================================================================= +## HSA Runtime Conformance Release License +## ============================================================================= +## The University of Illinois/NCSA +## Open Source License (NCSA) +## +## Copyright (c) 2014, Advanced Micro Devices, Inc. +## All rights reserved. +## +## Developed by: +## +## AMD Research and AMD HSA Software Development +## +## Advanced Micro Devices, Inc. +## +## www.amd.com +## +## Permission is hereby granted, free of charge, to any person obtaining a copy +## of this software and associated documentation files (the "Software"), to +## deal with the Software without restriction, including without limitation +## the rights to use, copy, modify, merge, publish, distribute, sublicense, +## and/or sell copies of the Software, and to permit persons to whom the +## Software is furnished to do so, subject to the following conditions: +## +## - Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimers. +## - Redistributions in binary form must reproduce the above copyright +## notice, this list of conditions and the following disclaimers in +## the documentation and/or other materials provided with the distribution. +## - Neither the names of , +## nor the names of its contributors may be used to endorse or promote +## products derived from this Software without specific prior written +## permission. +## +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +## DEALINGS WITH THE SOFTWARE. +## +################################################################################ + +#!/bin/bash + +CK_DEFAULT_TIMEOUT=360 CK_RUN_CASE=$2 ./$1 diff --git a/src/core/agent/hsa_agent.c b/src/core/agent/hsa_agent.c new file mode 100644 index 0000000..a68c04b --- /dev/null +++ b/src/core/agent/hsa_agent.c @@ -0,0 +1,65 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_agent.h" + +DEFINE_TEST(concurrent_iterate); +DEFINE_TEST(iterate_null_data); +DEFINE_TEST(iterate_terminate); +DEFINE_TEST(query_attributes); +DEFINE_TEST(concurrent_query); +DEFINE_TEST(query_system_attributes); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(concurrent_iterate); + ADD_TEST(iterate_null_data); + ADD_TEST(iterate_terminate); + ADD_TEST(query_attributes); + ADD_TEST(concurrent_query); + ADD_TEST(query_system_attributes); + RUN_TESTS(); +} diff --git a/src/core/agent/hsa_agent.h b/src/core/agent/hsa_agent.h new file mode 100644 index 0000000..c9057df --- /dev/null +++ b/src/core/agent/hsa_agent.h @@ -0,0 +1,54 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_AGENT_H_ +#define _HSA_AGENT_H_ +extern int test_iterate_null_data(); +extern int test_iterate_terminate(); +extern int test_query_attributes(); +extern int test_concurrent_query(); +extern int test_concurrent_iterate(); +extern int test_query_system_attributes(); +#endif // _HSA_AGENT_H_ diff --git a/src/core/agent/test_concurrent_iterate.c b/src/core/agent/test_concurrent_iterate.c new file mode 100644 index 0000000..61be8d1 --- /dev/null +++ b/src/core/agent/test_concurrent_iterate.c @@ -0,0 +1,141 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: concurrent_iterate + * Scope: Conformance + * + * Purpose: Verifies that the hsa_iterate_agents API is thread safe. + * + * Test Description: + * 1) Use hsa_iterate_agents to obtain a list of agents on the system + * and cache the result. + * 2) Create several threads that concurrently, + * a) Call hsa_iterate_agents to obtain a new list of agents. + * b) Compare the list of agent handles to the original list. + * 3) Repeat this several times. + * + * Expected Results: The concurrently generated lists should match the + * initial list. + */ + +#include +#include +#include +#include + +#define NUM_ITER 10 +#define NUM_TESTS 10 + +void wrapper_check_agent_list(void *data) { + struct agent_list_s *agent_list_orig = (struct agent_list_s *)data; + + // Get a new list of agents + struct agent_list_s agent_list_new; + get_agent_list(&agent_list_new); + + // Check if the number of agents in the new list is identical + // to the original list + ASSERT(agent_list_orig->num_agents == agent_list_new.num_agents); + + // Check agent handles to the original list + int ii; + for (ii = 0; ii < agent_list_orig->num_agents; ii++) + ASSERT(agent_list_orig->agents[ii].handle == + agent_list_new.agents[ii].handle); + + free_agent_list(&agent_list_new); + + return; +} + +int test_concurrent_iterate() { + int num_iter = NUM_ITER; + int num_threads = NUM_TESTS; + + // Repeat the test num_iter times + int ii; + for (ii = 0; ii < num_iter; ++ii) { + hsa_status_t status; + + // Init hsa runtime + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Get a list of agent + struct agent_list_s agent_list_orig; + get_agent_list(&agent_list_orig); + + // Create a test group + struct test_group *tg_agent = test_group_create(num_threads); + + // Add test functions with num_threads copies + test_group_add(tg_agent, &wrapper_check_agent_list, + &agent_list_orig, num_threads); + + // Create threads for the test group + test_group_thread_create(tg_agent); + + // Start tests + test_group_start(tg_agent); + + // Wait all tests finish + test_group_wait(tg_agent); + + // Exit all test threads + test_group_exit(tg_agent); + + // Clean up resources + test_group_destroy(tg_agent); + + free_agent_list(&agent_list_orig); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + return 0; +} diff --git a/src/core/agent/test_concurrent_query.c b/src/core/agent/test_concurrent_query.c new file mode 100644 index 0000000..202d43c --- /dev/null +++ b/src/core/agent/test_concurrent_query.c @@ -0,0 +1,382 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: concurrent_query + * Scope: Conformance + * + * Purpose: Verifies that the hsa_agent_get_info API is thread safe. + * + * Test Description: + * 1) Use hsa_iterate_agents to obtain the list of valid agents in the + * system. + * 2) For each agent, query all of the agent's attributes and cache + * those values. + * 3) For each attribute create several threads that, + * a) Concurrently query the attribute. + * b) Compares the attribute to the originally cached value. + * 4) Repeat this several times for each attribute. + * + * Expected Results: The concurrent queries should obtain the same results + * as the initial query. + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_ITER 10 +#define NUM_TESTS 10 + +struct attribute_s { + hsa_agent_t agent; + hsa_agent_info_t attr; + size_t size_attr; + void *value; +}; + +void check_attribute(void *data) { + hsa_status_t status; + struct attribute_s *attr_orig = (struct attribute_s *)data; + void *attr_val = (void*) malloc(attr_orig->size_attr); + memset(attr_val, 0, attr_orig->size_attr); + // Query a new attribute + status = hsa_agent_get_info(attr_orig->agent, attr_orig->attr, attr_val); + ASSERT(status == HSA_STATUS_SUCCESS); + // Compare the new attribute to the given attribute + ASSERT(memcmp(attr_val, attr_orig->value, attr_orig->size_attr) == 0); + free(attr_val); + return; +} + +int check_attribute_concurr(hsa_agent_t agent, + hsa_agent_info_t attr, + size_t size_attr, void *value) { + struct attribute_s attr_orig; + // Fill attribute information + attr_orig.attr = attr; + attr_orig.agent = agent; + attr_orig.size_attr = size_attr; + attr_orig.value = value; + + // Create threads to query the attribute concurrently + struct test_group *tg_attr = test_group_create(NUM_TESTS); + test_group_add(tg_attr, &check_attribute, &attr_orig, NUM_TESTS); + test_group_thread_create(tg_attr); + test_group_start(tg_attr); + test_group_wait(tg_attr); + test_group_exit(tg_attr); + test_group_destroy(tg_attr); + + return 0; +} + +void check_attributes_concurr(hsa_agent_t agent) { + hsa_status_t status; + int ii; + for (ii = 0; ii < NUM_ITER; ++ii) { + char name[64]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name); + check_attribute_concurr(agent, HSA_AGENT_INFO_NAME, sizeof(name), name); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + char vendor_name[64]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, + vendor_name); + + check_attribute_concurr(agent, HSA_AGENT_INFO_VENDOR_NAME, + sizeof(vendor_name), vendor_name); + } + + uint32_t feature = 0; + for (ii = 0; ii < NUM_ITER; ++ii) { + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &feature); + check_attribute_concurr(agent, HSA_AGENT_INFO_FEATURE, + sizeof(feature), &feature); + } + + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH == feature) { + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t wavefront_size = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, + &wavefront_size); + check_attribute_concurr(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, + sizeof(wavefront_size), &wavefront_size); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t workgroup_max_size; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, + &workgroup_max_size); + check_attribute_concurr(agent, + HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, + sizeof(workgroup_max_size), + &workgroup_max_size); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint16_t workgroup_max_dim[3]; + workgroup_max_dim[0] = 0; + workgroup_max_dim[1] = 0; + workgroup_max_dim[2] = 0; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_WORKGROUP_MAX_DIM, + workgroup_max_dim); + check_attribute_concurr(agent, + HSA_AGENT_INFO_WORKGROUP_MAX_DIM, + 3 * sizeof(uint16_t), + workgroup_max_dim); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t grid_max_size; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_GRID_MAX_SIZE, + &grid_max_size); + check_attribute_concurr(agent, + HSA_AGENT_INFO_GRID_MAX_SIZE, + sizeof(grid_max_size), + &grid_max_size); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + hsa_dim3_t grid_max_dim; + grid_max_dim.x = 0; + grid_max_dim.y = 0; + grid_max_dim.z = 0; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_GRID_MAX_DIM, + &grid_max_dim); + check_attribute_concurr(agent, + HSA_AGENT_INFO_GRID_MAX_DIM, + sizeof(hsa_dim3_t), + &grid_max_dim); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t fbarriers_max_size; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_FBARRIER_MAX_SIZE, + &fbarriers_max_size); + check_attribute_concurr(agent, + HSA_AGENT_INFO_FBARRIER_MAX_SIZE, + sizeof(fbarriers_max_size), + &fbarriers_max_size); + } + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t queues_max; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_QUEUES_MAX, + &queues_max); + check_attribute_concurr(agent, + HSA_AGENT_INFO_QUEUES_MAX, + sizeof(queues_max), + &queues_max); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t queue_max_size; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_QUEUE_MAX_SIZE, + &queue_max_size); + check_attribute_concurr(agent, + HSA_AGENT_INFO_QUEUE_MAX_SIZE, + sizeof(queue_max_size), + &queue_max_size); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t node; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &node); + check_attribute_concurr(agent, + HSA_AGENT_INFO_NODE, + sizeof(node), + &node); + } + + hsa_device_type_t device; + for (ii = 0; ii < NUM_ITER; ++ii) { + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device); + check_attribute_concurr(agent, + HSA_AGENT_INFO_DEVICE, + sizeof(device), + &device); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t cache_size[4]; + cache_size[0] = 0; + cache_size[1] = 0; + cache_size[2] = 0; + cache_size[3] = 0; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_CACHE_SIZE, + cache_size); + check_attribute_concurr(agent, + HSA_AGENT_INFO_CACHE_SIZE, + 4 * sizeof(uint32_t), + cache_size); + } + + if (device == HSA_DEVICE_TYPE_GPU) { + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t image1d_max_elems; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS, + &image1d_max_elems); + check_attribute_concurr(agent, + HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS, + sizeof(uint32_t), &image1d_max_elems); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t image2d_max_elems[2]; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS, + image2d_max_elems); + check_attribute_concurr(agent, + HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS, + 2 * sizeof(uint32_t), + image2d_max_elems); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t image3d_max_elems[3]; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS, + &image3d_max_elems); + check_attribute_concurr(agent, + HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS, + 3 * sizeof(uint32_t), + image3d_max_elems); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t image_array_max_layers; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS, + &image_array_max_layers); + check_attribute_concurr(agent, + HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS, + sizeof(uint32_t), + &image_array_max_layers); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t image_rd_max; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES, + &image_rd_max); + check_attribute_concurr(agent, + HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES, + sizeof(uint32_t), + &image_rd_max); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t image_rorw_max; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES, + &image_rorw_max); + check_attribute_concurr(agent, + HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES, + sizeof(uint32_t), + &image_rorw_max); + } + + for (ii = 0; ii < NUM_ITER; ++ii) { + uint32_t sampler_max; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS, + &sampler_max); + check_attribute_concurr(agent, + HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS, + sizeof(uint32_t), + &sampler_max); + } + } + + return; +} + +hsa_status_t check_agents_concurr(hsa_agent_t agent, void* data) { + hsa_agent_t *ret = (hsa_agent_t *)data; + *ret = agent; + + // Check attributes of the agent + check_attributes_concurr(agent); + + // Keep iterating + return HSA_STATUS_SUCCESS; +} + +int test_concurrent_query() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + hsa_agent_t agent; + status = hsa_iterate_agents(check_agents_concurr, &agent); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/agent/test_iterate_null_data.c b/src/core/agent/test_iterate_null_data.c new file mode 100644 index 0000000..60d2c62 --- /dev/null +++ b/src/core/agent/test_iterate_null_data.c @@ -0,0 +1,82 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: iterate_null_data + * Scope: Conformance + * + * Purpose: Verifies that passing a NULL data value to hsa_iterate_agents + * doesn't cause undefined behavior. + * + * Test Description: + * 1) Call hsa_iterate_agents with a valid callback but a NULL data + * value. + * + * Expected Results: The callback should run properly and the runtime + * shouldn't exhibit undefined behavior. + * + */ + +#include +#include +#include + +int test_iterate_null_data() { + hsa_status_t status; + + // Initialize hsa_runtime + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Call hsa_iterate_agents with a valid callback, but a NULL data + status = hsa_iterate_agents(check_agent, NULL); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Shutdown hsa_runtime + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/agent/test_iterate_terminate.c b/src/core/agent/test_iterate_terminate.c new file mode 100644 index 0000000..cba0d1d --- /dev/null +++ b/src/core/agent/test_iterate_terminate.c @@ -0,0 +1,93 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: iterate_terminate + * Scope: Conformance + * + * Purpose: Verifies that if the callback function passed to hsa_iterate_agents + * returns a status code other than HSA_STATUS_SUCCESS, iteration terminates. + * + * Test Description: + * 1) Call the hsa_iterate_agents API using a callback that returns a valid + * status code other than HSA_STATUS_SUCCESS. + * 2) Count the number of times the callback is invoked. This can be done + * using the data parameter of the callback function. Note that this implies + * that invocation of each callback isn't concurrent (otherwise iteration + * wouldn't terminate). + * + * Expected Results: The callback should be invoked only once, regardless of + * the number of agents. + */ + +#include +#include +#include + +hsa_status_t test_callback(hsa_agent_t agent, void *data) { + (*(unsigned int *)data)++; + return HSA_STATUS_INFO_BREAK; +} + +int test_iterate_terminate() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + unsigned int count = 0; + + status = hsa_iterate_agents(get_num_agents, &count); + ASSERT(status == HSA_STATUS_SUCCESS && 1 <= count); + + count = 0; + + status = hsa_iterate_agents(test_callback, &count); + ASSERT(status == HSA_STATUS_INFO_BREAK && 1 == count); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/agent/test_query_attributes.c b/src/core/agent/test_query_attributes.c new file mode 100644 index 0000000..dccadd6 --- /dev/null +++ b/src/core/agent/test_query_attributes.c @@ -0,0 +1,86 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: query_attributes + * Scope: Conformance + * + * Purpose: Verifies that the agent list can be traversed using the + * hsa_iterate_agents + * API, and that every agent attribute can be queried using the + * hsa_agent_get_info + * API. + * + * Test Description: + * 1) Call hsa_iterate_agent with a callback that does the following for each + * agent, + * a) Query all defined attributes using the hsa_agent_get_info API. + * b) Checks each of the queried attributes for known constraints. + * 2) Count the number of agents available. + * + * Expected Results: Each attribute should have a reasonable set of + * values. There should + * be at least 2 HSA agents, one CPU and one COMPONENT. + */ + +#include +#include +#include + +int test_query_attributes() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Call hsa_iterate_agents traversing all agents and check attributes of + // each agent + status = hsa_iterate_agents(check_agent, NULL); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/agent/test_query_system_attributes.c b/src/core/agent/test_query_system_attributes.c new file mode 100644 index 0000000..6df5e84 --- /dev/null +++ b/src/core/agent/test_query_system_attributes.c @@ -0,0 +1,80 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: query_system_attributes + * Scope: Conformance + * + * Purpose: Verifies that all system attributes can be queried using the + * hsa_system_get_info API and that all system attributes satisfy the + * defined constraints. + * + * Test Description: + * 1) Call hsa_system_get_info to query all of the hsa_system_info_t + * attributes defined in the spec. + * 2) Check that each attribute satisfies its specific constraints. + * + * Expected Results: All attributes that are defined for the system should be + * valid + * and satisfy the defined constraints. + */ + + +#include +#include +#include + +int test_query_system_attributes() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + check_system_info(); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/api/hsa_api.c b/src/core/api/hsa_api.c new file mode 100644 index 0000000..448615e --- /dev/null +++ b/src/core/api/hsa_api.c @@ -0,0 +1,367 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_api.h" + +DEFINE_TEST(hsa_init); +DEFINE_TEST(hsa_init_MAX); +DEFINE_TEST(hsa_shut_down); +DEFINE_TEST(hsa_shut_down_not_initialized); +DEFINE_TEST(hsa_shut_down_after_shut_down); +DEFINE_TEST(hsa_status_string); +DEFINE_TEST(hsa_status_string_not_initialized); +DEFINE_TEST(hsa_status_string_invalid_status); +DEFINE_TEST(hsa_status_string_invalid_ptr); +DEFINE_TEST(hsa_iterate_agents); +DEFINE_TEST(hsa_iterate_agents_not_initialized); +DEFINE_TEST(hsa_iterate_agents_invalid_callback); +DEFINE_TEST(hsa_agent_get_info); +DEFINE_TEST(hsa_agent_get_info_not_initialized); +DEFINE_TEST(hsa_agent_get_info_invalid_agent); +DEFINE_TEST(hsa_agent_get_info_invalid_attribute); +DEFINE_TEST(hsa_agent_get_info_invalid_ptr); +DEFINE_TEST(hsa_agent_extension_supported); +DEFINE_TEST(hsa_agent_extension_supported_not_initialized); +DEFINE_TEST(hsa_agent_extension_supported_invalid_agent); +DEFINE_TEST(hsa_agent_extension_supported_invalid_extension); +DEFINE_TEST(hsa_agent_extension_supported_null_result_ptr); +DEFINE_TEST(hsa_agent_get_exception_policies); +DEFINE_TEST(hsa_agent_get_exception_policies_not_initialized); +DEFINE_TEST(hsa_agent_get_exception_policies_invalid_agent); +DEFINE_TEST(hsa_agent_get_exception_policies_null_mask_ptr); +DEFINE_TEST(hsa_agent_get_exception_policies_invalid_profile); +DEFINE_TEST(hsa_system_extension_supported); +DEFINE_TEST(hsa_system_extension_supported_not_initialized); +DEFINE_TEST(hsa_system_extension_supported_invalid_extension); +DEFINE_TEST(hsa_system_extension_supported_null_result_ptr); +DEFINE_TEST(hsa_system_get_extension_table); +DEFINE_TEST(hsa_system_get_extension_table_not_initialized); +DEFINE_TEST(hsa_system_get_extension_table_invalid_extension); +DEFINE_TEST(hsa_system_get_extension_table_null_table_ptr); +DEFINE_TEST(hsa_system_get_info); +DEFINE_TEST(hsa_system_get_info_not_initialized); +DEFINE_TEST(hsa_system_get_info_invalid_attribute); +DEFINE_TEST(hsa_system_get_info_invalid_ptr); +DEFINE_TEST(hsa_signal_create); +DEFINE_TEST(hsa_signal_create_not_initialized); +DEFINE_TEST(hsa_signal_create_null_signal); +DEFINE_TEST(hsa_signal_create_invalid_arg); +DEFINE_TEST(hsa_signal_destroy); +DEFINE_TEST(hsa_signal_destroy_not_initialized); +DEFINE_TEST(hsa_signal_destroy_invalid_arg); +DEFINE_TEST(hsa_signal_destroy_invalid_signal); +DEFINE_TEST(hsa_signal_load_acquire); +DEFINE_TEST(hsa_signal_load_relaxed); +DEFINE_TEST(hsa_signal_store_release); +DEFINE_TEST(hsa_signal_store_relaxed); +DEFINE_TEST(hsa_signal_exchange_acq_rel); +DEFINE_TEST(hsa_signal_exchange_acquire); +DEFINE_TEST(hsa_signal_exchange_relaxed); +DEFINE_TEST(hsa_signal_exchange_release); +DEFINE_TEST(hsa_signal_cas_acq_rel); +DEFINE_TEST(hsa_signal_cas_acquire); +DEFINE_TEST(hsa_signal_cas_relaxed); +DEFINE_TEST(hsa_signal_cas_release); +DEFINE_TEST(hsa_signal_add_acq_rel); +DEFINE_TEST(hsa_signal_add_acquire); +DEFINE_TEST(hsa_signal_add_relaxed); +DEFINE_TEST(hsa_signal_add_release); +DEFINE_TEST(hsa_signal_subtract_acq_rel); +DEFINE_TEST(hsa_signal_subtract_acquire); +DEFINE_TEST(hsa_signal_subtract_relaxed); +DEFINE_TEST(hsa_signal_subtract_release); +DEFINE_TEST(hsa_signal_and_acq_rel); +DEFINE_TEST(hsa_signal_and_acquire); +DEFINE_TEST(hsa_signal_and_relaxed); +DEFINE_TEST(hsa_signal_and_release); +DEFINE_TEST(hsa_signal_or_acq_rel); +DEFINE_TEST(hsa_signal_or_acquire); +DEFINE_TEST(hsa_signal_or_relaxed); +DEFINE_TEST(hsa_signal_or_release); +DEFINE_TEST(hsa_signal_xor_acq_rel); +DEFINE_TEST(hsa_signal_xor_acquire); +DEFINE_TEST(hsa_signal_xor_relaxed); +DEFINE_TEST(hsa_signal_xor_release); +DEFINE_TEST(hsa_queue_create); +DEFINE_TEST(hsa_queue_create_not_initialized); +DEFINE_TEST(hsa_queue_create_out_of_resources); +DEFINE_TEST(hsa_queue_create_invalid_agent); +DEFINE_TEST(hsa_queue_create_invalid_queue_creation); +DEFINE_TEST(hsa_queue_create_invalid_argument); +DEFINE_TEST(hsa_queue_destroy); +DEFINE_TEST(hsa_queue_destroy_not_initialized); +DEFINE_TEST(hsa_queue_destroy_invalid_queue); +DEFINE_TEST(hsa_queue_destroy_invalid_argument); +DEFINE_TEST(hsa_queue_inactivate); +DEFINE_TEST(hsa_queue_inactivate_not_initialized); +DEFINE_TEST(hsa_queue_inactivate_invalid_queue); +DEFINE_TEST(hsa_queue_inactivate_invalid_argument); +DEFINE_TEST(hsa_queue_load_read_index_acquire); +DEFINE_TEST(hsa_queue_load_read_index_relaxed); +DEFINE_TEST(hsa_queue_load_store_write_index_acquire_relaxed); +DEFINE_TEST(hsa_queue_load_store_write_index_relaxed_release); +DEFINE_TEST(hsa_queue_cas_write_index_acq_rel); +DEFINE_TEST(hsa_queue_cas_write_index_acquire); +DEFINE_TEST(hsa_queue_cas_write_index_relaxed); +DEFINE_TEST(hsa_queue_cas_write_index_release); +DEFINE_TEST(hsa_queue_add_write_index_acq_rel); +DEFINE_TEST(hsa_queue_add_write_index_acquire); +DEFINE_TEST(hsa_queue_add_write_index_relaxed); +DEFINE_TEST(hsa_queue_add_write_index_release); +DEFINE_TEST(hsa_memory_allocate); +DEFINE_TEST(hsa_memory_allocate_not_initialized); +DEFINE_TEST(hsa_memory_allocate_null_ptr); +DEFINE_TEST(hsa_memory_allocate_zero_size); +DEFINE_TEST(hsa_memory_allocate_invalid_allocation); +DEFINE_TEST(hsa_memory_allocate_invalid_region); +DEFINE_TEST(hsa_memory_free); +DEFINE_TEST(hsa_memory_free_not_initialized); +DEFINE_TEST(hsa_memory_register); +DEFINE_TEST(hsa_memory_register_not_initialized); +DEFINE_TEST(hsa_memory_register_invalid_argument); +DEFINE_TEST(hsa_memory_deregister); +DEFINE_TEST(hsa_memory_deregister_not_initialized); +DEFINE_TEST(hsa_region_get_info); +DEFINE_TEST(hsa_region_get_info_not_initialized); +DEFINE_TEST(hsa_region_get_info_invalid_region); +DEFINE_TEST(hsa_region_get_info_invalid_argument); +DEFINE_TEST(hsa_agent_iterate_regions); +DEFINE_TEST(hsa_agent_iterate_regions_not_initialized); +DEFINE_TEST(hsa_agent_iterate_regions_invalid_argument); +DEFINE_TEST(hsa_agent_iterate_regions_invalid_agent); +DEFINE_TEST(hsa_isa_get_info); +DEFINE_TEST(hsa_isa_get_info_not_initialized); +DEFINE_TEST(hsa_isa_get_info_invalid_isa); +DEFINE_TEST(hsa_isa_get_info_index_out_of_range); +DEFINE_TEST(hsa_isa_get_info_invalid_attribute); +DEFINE_TEST(hsa_isa_get_info_invalid_null_value); +DEFINE_TEST(hsa_code_object_get_info); +DEFINE_TEST(hsa_code_symbol_get_info); +DEFINE_TEST(hsa_executable_create); +DEFINE_TEST(hsa_executable_create_not_initialized); +DEFINE_TEST(hsa_executable_create_invalid_argument); +DEFINE_TEST(hsa_executable_create_out_of_resources); +DEFINE_TEST(hsa_executable_destroy); +DEFINE_TEST(hsa_executable_destroy_not_initialized); +DEFINE_TEST(hsa_executable_destroy_invalid_executable); +DEFINE_TEST(hsa_executable_load_code_object); +DEFINE_TEST(hsa_executable_load_code_object_not_initialized); +DEFINE_TEST(hsa_executable_load_code_object_invalid_executable); +DEFINE_TEST(hsa_executable_load_code_object_invalid_agent); +DEFINE_TEST(hsa_executable_load_code_object_invalid_code_object); +DEFINE_TEST(hsa_executable_load_code_object_frozen_executable); +DEFINE_TEST(hsa_executable_get_info); +DEFINE_TEST(hsa_executable_symbol_get_info); +DEFINE_TEST(hsa_soft_queue_create); +DEFINE_TEST(hsa_isa_from_name); +DEFINE_TEST(hsa_isa_from_name_null_name); +DEFINE_TEST(hsa_isa_from_name_null_isa); +DEFINE_TEST(hsa_isa_from_name_invalid_isa_name); +DEFINE_TEST(hsa_isa_compatible); +DEFINE_TEST(hsa_isa_compatible_invalid_isa); +DEFINE_TEST(hsa_isa_compatible_null_result); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(hsa_init); + // ADD_TEST(hsa_init_MAX); + ADD_TEST(hsa_shut_down); + ADD_TEST(hsa_shut_down_not_initialized); + ADD_TEST(hsa_shut_down_after_shut_down); + ADD_TEST(hsa_status_string); + ADD_TEST(hsa_status_string_not_initialized); + ADD_TEST(hsa_status_string_invalid_status); + ADD_TEST(hsa_status_string_invalid_ptr); + ADD_TEST(hsa_iterate_agents); + ADD_TEST(hsa_iterate_agents_not_initialized); + ADD_TEST(hsa_iterate_agents_invalid_callback); + ADD_TEST(hsa_agent_get_info); + ADD_TEST(hsa_agent_get_info_not_initialized); + ADD_TEST(hsa_agent_get_info_invalid_agent); + ADD_TEST(hsa_agent_get_info_invalid_attribute); + ADD_TEST(hsa_agent_get_info_invalid_ptr); + ADD_TEST(hsa_agent_extension_supported); + ADD_TEST(hsa_agent_extension_supported_not_initialized); + ADD_TEST(hsa_agent_extension_supported_invalid_agent); + ADD_TEST(hsa_agent_extension_supported_invalid_extension); + ADD_TEST(hsa_agent_extension_supported_null_result_ptr); + ADD_TEST(hsa_agent_get_exception_policies); + ADD_TEST(hsa_agent_get_exception_policies_not_initialized); + ADD_TEST(hsa_agent_get_exception_policies_invalid_agent); + ADD_TEST(hsa_agent_get_exception_policies_null_mask_ptr); + ADD_TEST(hsa_agent_get_exception_policies_invalid_profile); + ADD_TEST(hsa_system_extension_supported); + ADD_TEST(hsa_system_extension_supported_not_initialized); + ADD_TEST(hsa_system_extension_supported_invalid_extension); + ADD_TEST(hsa_system_extension_supported_null_result_ptr); + ADD_TEST(hsa_system_get_extension_table); + ADD_TEST(hsa_system_get_extension_table_not_initialized); + ADD_TEST(hsa_system_get_extension_table_invalid_extension); + ADD_TEST(hsa_system_get_extension_table_null_table_ptr); + ADD_TEST(hsa_system_get_info); + ADD_TEST(hsa_system_get_info_not_initialized); + ADD_TEST(hsa_system_get_info_invalid_attribute); + ADD_TEST(hsa_system_get_info_invalid_ptr); + ADD_TEST(hsa_signal_create); + ADD_TEST(hsa_signal_create_not_initialized); + ADD_TEST(hsa_signal_create_null_signal); + ADD_TEST(hsa_signal_create_invalid_arg); + ADD_TEST(hsa_signal_destroy); + ADD_TEST(hsa_signal_destroy_not_initialized); + ADD_TEST(hsa_signal_destroy_invalid_arg); + ADD_TEST(hsa_signal_destroy_invalid_signal); + ADD_TEST(hsa_signal_load_acquire); + ADD_TEST(hsa_signal_load_relaxed); + ADD_TEST(hsa_signal_store_release); + ADD_TEST(hsa_signal_store_relaxed); + ADD_TEST(hsa_signal_exchange_acq_rel); + ADD_TEST(hsa_signal_exchange_acquire); + ADD_TEST(hsa_signal_exchange_relaxed); + ADD_TEST(hsa_signal_exchange_release); + ADD_TEST(hsa_signal_cas_acq_rel); + ADD_TEST(hsa_signal_cas_acquire); + ADD_TEST(hsa_signal_cas_relaxed); + ADD_TEST(hsa_signal_cas_release); + ADD_TEST(hsa_signal_add_acq_rel); + ADD_TEST(hsa_signal_add_acquire); + ADD_TEST(hsa_signal_add_relaxed); + ADD_TEST(hsa_signal_add_release); + ADD_TEST(hsa_signal_subtract_acq_rel); + ADD_TEST(hsa_signal_subtract_acquire); + ADD_TEST(hsa_signal_subtract_relaxed); + ADD_TEST(hsa_signal_subtract_release); + ADD_TEST(hsa_signal_and_acq_rel); + ADD_TEST(hsa_signal_and_acquire); + ADD_TEST(hsa_signal_and_relaxed); + ADD_TEST(hsa_signal_and_release); + ADD_TEST(hsa_signal_or_acq_rel); + ADD_TEST(hsa_signal_or_acquire); + ADD_TEST(hsa_signal_or_relaxed); + ADD_TEST(hsa_signal_or_release); + ADD_TEST(hsa_signal_xor_acq_rel); + ADD_TEST(hsa_signal_xor_acquire); + ADD_TEST(hsa_signal_xor_relaxed); + ADD_TEST(hsa_signal_xor_release); + ADD_TEST(hsa_queue_create); + ADD_TEST(hsa_queue_create_not_initialized); + ADD_TEST(hsa_queue_create_out_of_resources); + ADD_TEST(hsa_queue_create_invalid_agent); + ADD_TEST(hsa_queue_create_invalid_queue_creation); + ADD_TEST(hsa_queue_create_invalid_argument); + ADD_TEST(hsa_queue_destroy); + ADD_TEST(hsa_queue_destroy_not_initialized); + ADD_TEST(hsa_queue_destroy_invalid_queue); + ADD_TEST(hsa_queue_destroy_invalid_argument); + ADD_TEST(hsa_queue_inactivate); + ADD_TEST(hsa_queue_inactivate_not_initialized); + ADD_TEST(hsa_queue_inactivate_invalid_queue); + ADD_TEST(hsa_queue_inactivate_invalid_argument); + ADD_TEST(hsa_queue_load_read_index_acquire); + ADD_TEST(hsa_queue_load_read_index_relaxed); + ADD_TEST(hsa_queue_load_store_write_index_acquire_relaxed); + ADD_TEST(hsa_queue_load_store_write_index_relaxed_release); + ADD_TEST(hsa_queue_cas_write_index_acq_rel); + ADD_TEST(hsa_queue_cas_write_index_acquire); + ADD_TEST(hsa_queue_cas_write_index_relaxed); + ADD_TEST(hsa_queue_cas_write_index_release); + ADD_TEST(hsa_queue_add_write_index_acq_rel); + ADD_TEST(hsa_queue_add_write_index_acquire); + ADD_TEST(hsa_queue_add_write_index_relaxed); + ADD_TEST(hsa_queue_add_write_index_release); + ADD_TEST(hsa_memory_allocate); + ADD_TEST(hsa_memory_allocate_not_initialized); + ADD_TEST(hsa_memory_allocate_null_ptr); + ADD_TEST(hsa_memory_allocate_zero_size); + ADD_TEST(hsa_memory_allocate_invalid_allocation); + ADD_TEST(hsa_memory_allocate_invalid_region); + ADD_TEST(hsa_memory_free); + ADD_TEST(hsa_memory_free_not_initialized); + ADD_TEST(hsa_memory_register); + ADD_TEST(hsa_memory_register_not_initialized); + ADD_TEST(hsa_memory_register_invalid_argument); + ADD_TEST(hsa_memory_deregister); + ADD_TEST(hsa_memory_deregister_not_initialized); + ADD_TEST(hsa_region_get_info); + ADD_TEST(hsa_region_get_info_not_initialized); + ADD_TEST(hsa_region_get_info_invalid_region); + ADD_TEST(hsa_region_get_info_invalid_argument); + ADD_TEST(hsa_agent_iterate_regions); + ADD_TEST(hsa_agent_iterate_regions_not_initialized); + ADD_TEST(hsa_agent_iterate_regions_invalid_argument); + ADD_TEST(hsa_agent_iterate_regions_invalid_agent); + ADD_TEST(hsa_isa_get_info); + ADD_TEST(hsa_isa_get_info_not_initialized); + ADD_TEST(hsa_isa_get_info_invalid_isa); + ADD_TEST(hsa_isa_get_info_index_out_of_range); + ADD_TEST(hsa_isa_get_info_invalid_attribute); + ADD_TEST(hsa_isa_get_info_invalid_null_value); + ADD_TEST(hsa_code_object_get_info); + ADD_TEST(hsa_code_symbol_get_info); + ADD_TEST(hsa_executable_create); + ADD_TEST(hsa_executable_create_not_initialized); + ADD_TEST(hsa_executable_create_invalid_argument); + ADD_TEST(hsa_executable_create_out_of_resources); + ADD_TEST(hsa_executable_destroy); + ADD_TEST(hsa_executable_destroy_not_initialized); + ADD_TEST(hsa_executable_destroy_invalid_executable); + ADD_TEST(hsa_executable_load_code_object); + ADD_TEST(hsa_executable_load_code_object_not_initialized); + ADD_TEST(hsa_executable_load_code_object_invalid_executable); + ADD_TEST(hsa_executable_load_code_object_invalid_agent); + ADD_TEST(hsa_executable_load_code_object_invalid_code_object); + ADD_TEST(hsa_executable_load_code_object_frozen_executable); + ADD_TEST(hsa_executable_get_info); + ADD_TEST(hsa_executable_symbol_get_info); + ADD_TEST(hsa_soft_queue_create); + ADD_TEST(hsa_isa_from_name); + ADD_TEST(hsa_isa_from_name_null_name); + ADD_TEST(hsa_isa_from_name_null_isa); + ADD_TEST(hsa_isa_from_name_invalid_isa_name); + ADD_TEST(hsa_isa_compatible); + ADD_TEST(hsa_isa_compatible_invalid_isa); + ADD_TEST(hsa_isa_compatible_null_result); + RUN_TESTS(); +} diff --git a/src/core/api/hsa_api.h b/src/core/api/hsa_api.h new file mode 100644 index 0000000..fe42955 --- /dev/null +++ b/src/core/api/hsa_api.h @@ -0,0 +1,207 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_API_H_ +#define _HSA_API_H_ + +extern int test_hsa_init(); +extern int test_hsa_init_MAX(); +extern int test_hsa_shut_down(); +extern int test_hsa_shut_down_not_initialized(); +extern int test_hsa_shut_down_after_shut_down(); +extern int test_hsa_status_string(); +extern int test_hsa_status_string_not_initialized(); +extern int test_hsa_status_string_invalid_status(); +extern int test_hsa_status_string_invalid_ptr(); +extern int test_hsa_iterate_agents(); +extern int test_hsa_iterate_agents_not_initialized(); +extern int test_hsa_iterate_agents_invalid_callback(); +extern int test_hsa_agent_get_info(); +extern int test_hsa_agent_get_info_not_initialized(); +extern int test_hsa_agent_get_info_invalid_agent(); +extern int test_hsa_agent_get_info_invalid_attribute(); +extern int test_hsa_agent_get_info_invalid_ptr(); +extern int test_hsa_agent_extension_supported(); +extern int test_hsa_agent_extension_supported_not_initialized(); +extern int test_hsa_agent_extension_supported_invalid_agent(); +extern int test_hsa_agent_extension_supported_invalid_extension(); +extern int test_hsa_agent_extension_supported_null_result_ptr(); +extern int test_hsa_agent_get_exception_policies(); +extern int test_hsa_agent_get_exception_policies_not_initialized(); +extern int test_hsa_agent_get_exception_policies_invalid_agent(); +extern int test_hsa_agent_get_exception_policies_null_mask_ptr(); +extern int test_hsa_agent_get_exception_policies_invalid_profile(); +extern int test_hsa_system_extension_supported(); +extern int test_hsa_system_extension_supported_not_initialized(); +extern int test_hsa_system_extension_supported_invalid_extension(); +extern int test_hsa_system_extension_supported_null_result_ptr(); +extern int test_hsa_system_get_extension_table(); +extern int test_hsa_system_get_extension_table_not_initialized(); +extern int test_hsa_system_get_extension_table_invalid_extension(); +extern int test_hsa_system_get_extension_table_null_table_ptr(); +extern int test_hsa_system_get_info(); +extern int test_hsa_system_get_info_not_initialized(); +extern int test_hsa_system_get_info_invalid_attribute(); +extern int test_hsa_system_get_info_invalid_ptr(); +extern int test_hsa_signal_create(); +extern int test_hsa_signal_create_not_initialized(); +extern int test_hsa_signal_create_null_signal(); +extern int test_hsa_signal_create_invalid_arg(); +extern int test_hsa_signal_destroy(); +extern int test_hsa_signal_destroy_not_initialized(); +extern int test_hsa_signal_destroy_invalid_arg(); +extern int test_hsa_signal_destroy_invalid_signal(); +extern int test_hsa_signal_load_acquire(); +extern int test_hsa_signal_load_relaxed(); +extern int test_hsa_signal_store_release(); +extern int test_hsa_signal_store_relaxed(); +extern int test_hsa_signal_exchange_acq_rel(); +extern int test_hsa_signal_exchange_acquire(); +extern int test_hsa_signal_exchange_relaxed(); +extern int test_hsa_signal_exchange_release(); +extern int test_hsa_signal_cas_acq_rel(); +extern int test_hsa_signal_cas_acquire(); +extern int test_hsa_signal_cas_relaxed(); +extern int test_hsa_signal_cas_release(); +extern int test_hsa_signal_add_acq_rel(); +extern int test_hsa_signal_add_acquire(); +extern int test_hsa_signal_add_relaxed(); +extern int test_hsa_signal_add_release(); +extern int test_hsa_signal_subtract_acq_rel(); +extern int test_hsa_signal_subtract_acquire(); +extern int test_hsa_signal_subtract_relaxed(); +extern int test_hsa_signal_subtract_release(); +extern int test_hsa_signal_and_acq_rel(); +extern int test_hsa_signal_and_acquire(); +extern int test_hsa_signal_and_relaxed(); +extern int test_hsa_signal_and_release(); +extern int test_hsa_signal_or_acq_rel(); +extern int test_hsa_signal_or_acquire(); +extern int test_hsa_signal_or_relaxed(); +extern int test_hsa_signal_or_release(); +extern int test_hsa_signal_xor_acq_rel(); +extern int test_hsa_signal_xor_acquire(); +extern int test_hsa_signal_xor_relaxed(); +extern int test_hsa_signal_xor_release(); +extern int test_hsa_queue_create(); +extern int test_hsa_queue_create_not_initialized(); +extern int test_hsa_queue_create_out_of_resources(); +extern int test_hsa_queue_create_invalid_agent(); +extern int test_hsa_queue_create_invalid_queue_creation(); +extern int test_hsa_queue_create_invalid_argument(); +extern int test_hsa_queue_destroy(); +extern int test_hsa_queue_destroy_not_initialized(); +extern int test_hsa_queue_destroy_invalid_queue(); +extern int test_hsa_queue_destroy_invalid_argument(); +extern int test_hsa_queue_inactivate(); +extern int test_hsa_queue_inactivate_not_initialized(); +extern int test_hsa_queue_inactivate_invalid_queue(); +extern int test_hsa_queue_inactivate_invalid_argument(); +extern int test_hsa_queue_load_read_index_acquire(); +extern int test_hsa_queue_load_read_index_relaxed(); +extern int test_hsa_queue_load_store_write_index_acquire_relaxed(); +extern int test_hsa_queue_load_store_write_index_relaxed_release(); +extern int test_hsa_queue_cas_write_index_acq_rel(); +extern int test_hsa_queue_cas_write_index_acquire(); +extern int test_hsa_queue_cas_write_index_relaxed(); +extern int test_hsa_queue_cas_write_index_release(); +extern int test_hsa_queue_add_write_index_acq_rel(); +extern int test_hsa_queue_add_write_index_acquire(); +extern int test_hsa_queue_add_write_index_relaxed(); +extern int test_hsa_queue_add_write_index_release(); +extern int test_hsa_memory_allocate_not_initialized(); +extern int test_hsa_memory_allocate_null_ptr(); +extern int test_hsa_memory_allocate_zero_size(); +extern int test_hsa_memory_allocate_invalid_allocation(); +extern int test_hsa_memory_allocate_invalid_region(); +extern int test_hsa_memory_allocate(); +extern int test_hsa_memory_free(); +extern int test_hsa_memory_free_not_initialized(); +extern int test_hsa_memory_register(); +extern int test_hsa_memory_register_not_initialized(); +extern int test_hsa_memory_register_invalid_argument(); +extern int test_hsa_memory_deregister(); +extern int test_hsa_memory_deregister_not_initialized(); +extern int test_hsa_region_get_info(); +extern int test_hsa_region_get_info_not_initialized(); +extern int test_hsa_region_get_info_invalid_region(); +extern int test_hsa_region_get_info_invalid_argument(); +extern int test_hsa_agent_iterate_regions(); +extern int test_hsa_agent_iterate_regions_not_initialized(); +extern int test_hsa_agent_iterate_regions_invalid_argument(); +extern int test_hsa_agent_iterate_regions_invalid_agent(); +extern int test_hsa_isa_get_info(); +extern int test_hsa_isa_get_info_not_initialized(); +extern int test_hsa_isa_get_info_invalid_isa(); +extern int test_hsa_isa_get_info_index_out_of_range(); +extern int test_hsa_isa_get_info_invalid_attribute(); +extern int test_hsa_isa_get_info_invalid_null_value(); +extern int test_hsa_code_object_get_info(); +extern int test_hsa_code_symbol_get_info(); +extern int test_hsa_executable_create(); +extern int test_hsa_executable_create_not_initialized(); +extern int test_hsa_executable_create_invalid_argument(); +extern int test_hsa_executable_create_out_of_resources(); +extern int test_hsa_executable_destroy(); +extern int test_hsa_executable_destroy_not_initialized(); +extern int test_hsa_executable_destroy_invalid_executable(); +extern int test_hsa_executable_load_code_object(); +extern int test_hsa_executable_load_code_object_not_initialized(); +extern int test_hsa_executable_load_code_object_invalid_executable(); +extern int test_hsa_executable_load_code_object_invalid_agent(); +extern int test_hsa_executable_load_code_object_invalid_code_object(); +extern int test_hsa_executable_load_code_object_frozen_executable(); +extern int test_hsa_executable_get_info(); +extern int test_hsa_executable_symbol_get_info(); +extern int test_hsa_soft_queue_create(); +extern int test_hsa_isa_from_name(); +extern int test_hsa_isa_from_name_null_name(); +extern int test_hsa_isa_from_name_null_isa(); +extern int test_hsa_isa_from_name_invalid_isa_name(); +extern int test_hsa_isa_compatible(); +extern int test_hsa_isa_compatible_invalid_isa(); +extern int test_hsa_isa_compatible_null_result(); + +#endif // _HSA_API_H_ diff --git a/src/core/api/test_helper_func.c b/src/core/api/test_helper_func.c new file mode 100644 index 0000000..c7aa40c --- /dev/null +++ b/src/core/api/test_helper_func.c @@ -0,0 +1,538 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +// Callback function to get the number of agents +hsa_status_t callback_get_num_agents(hsa_agent_t agent, void* data) { + int *num_agents = (int *)data; + (*num_agents)++; + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the list of agents +hsa_status_t callback_get_agents(hsa_agent_t agent, void* data) { + hsa_agent_t **agent_list = (hsa_agent_t **)data; + **agent_list = agent; + (*agent_list)++; + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the first agent that supports kernel dispatch +hsa_status_t callback_get_kernel_dispatch_agent(hsa_agent_t agent, void* data) { + hsa_status_t status; + hsa_agent_feature_t feature; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &feature); + if (HSA_STATUS_SUCCESS == status && + HSA_AGENT_FEATURE_KERNEL_DISPATCH == feature) { + hsa_agent_t* ret = (hsa_agent_t*)data; + *ret = agent; + return HSA_STATUS_INFO_BREAK; + } + return HSA_STATUS_SUCCESS; +} + +// Check if the input is the power of two +char isPowerOfTwo_local(uint32_t x) { + while (((x & 1) == 0) && x > 1) /* While x is even and > 1 */ + x >>= 1; + return (x == 1); +} + +// Check if every attribute of the agent is valid +void check_attributes(hsa_agent_t agent) { + hsa_status_t status; + char name[64]; + int ii; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name); + ASSERT(HSA_STATUS_SUCCESS == status); + + char vendor_name[64]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, vendor_name); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t feature = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(feature == HSA_AGENT_FEATURE_KERNEL_DISPATCH || + feature == HSA_AGENT_FEATURE_AGENT_DISPATCH); + + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH == feature) { + uint32_t wavefront_size = 0; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_WAVEFRONT_SIZE, + &wavefront_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT_MSG((wavefront_size >= 1) && + (wavefront_size <= 256) && + isPowerOfTwo_local(wavefront_size), + "Error: wavefront_size = %u", wavefront_size); + + uint32_t workgroup_max_size; + status = + hsa_agent_get_info(agent, + HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, + &workgroup_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT_MSG(workgroup_max_size > 0, + "Error: workgroup_max_size = %u", + workgroup_max_size); + + uint16_t workgroup_max_dim[3]; + workgroup_max_dim[0] = 0; + workgroup_max_dim[1] = 0; + workgroup_max_dim[2] = 0; + status = + hsa_agent_get_info(agent, + HSA_AGENT_INFO_WORKGROUP_MAX_DIM, + workgroup_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + for (ii = 0; ii < 3; ii++) { + ASSERT_MSG((workgroup_max_dim[ii] > 0 && + workgroup_max_dim[ii] <= workgroup_max_size), + "Error: workgroup_max_dim[%d] = %u", + ii, + workgroup_max_dim[ii]); + } + + uint32_t grid_max_size; + status = + hsa_agent_get_info(agent, + HSA_AGENT_INFO_GRID_MAX_SIZE, + &grid_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(grid_max_size > 0); + + hsa_dim3_t grid_max_dim; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_GRID_MAX_DIM, + &grid_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(grid_max_dim.x > 0 && + grid_max_dim.x >= workgroup_max_dim[0] && + grid_max_dim.x <= grid_max_size); + ASSERT(grid_max_dim.y > 0 && + grid_max_dim.y >= workgroup_max_dim[1] && + grid_max_dim.y <= grid_max_size); + ASSERT(grid_max_dim.z > 0 && + grid_max_dim.z >= workgroup_max_dim[2] && + grid_max_dim.z <= grid_max_size); + + uint32_t fbarriers_max_size; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_FBARRIER_MAX_SIZE, + &fbarriers_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT_MSG(fbarriers_max_size >= 32, + "Error: fbarriers_max_size = %u", fbarriers_max_size); + } + + uint32_t queues_max; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t queue_max_size; + status = hsa_agent_get_info(agent, + HSA_AGENT_INFO_QUEUE_MAX_SIZE, + &queue_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(queue_max_size > 0 && isPowerOfTwo_local(queue_max_size)); + + uint32_t node; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &node); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_device_type_t device; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(device == HSA_DEVICE_TYPE_GPU || + device == HSA_DEVICE_TYPE_CPU || + device == HSA_DEVICE_TYPE_DSP); + + uint32_t cache_size[4]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, cache_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (device == HSA_DEVICE_TYPE_GPU) { + uint32_t image1d_max_elems; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS, + &image1d_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image1d_max_elems <= 16384); + + uint32_t image1da_max_elems; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS, + &image1da_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image1da_max_elems <= 16384); + + uint32_t image1db_max_elems; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS, + &image1db_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image1db_max_elems <= 65536); + + uint32_t image2d_max_elems[2]; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS, + image2d_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2d_max_elems[0] <= 16384 && image2d_max_elems[1] <= 16384); + + uint32_t image2da_max_elems[2]; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS, + image2da_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2da_max_elems[0] <= 16384 && + image2da_max_elems[1] <= 16384); + + uint32_t image2dd_max_elems[2]; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS, + image2dd_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2dd_max_elems[0] <= 16384 && + image2dd_max_elems[1] <= 16384); + + uint32_t image2dad_max_elems[2]; + status = + hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS, + image2dad_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2dad_max_elems[0] <= 16384 && + image2dad_max_elems[1] <= 16384); + + uint32_t image3d_max_elems[3]; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS, + image3d_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image3d_max_elems[0] <= 2048 && + image3d_max_elems[1] <= 2048 && + image3d_max_elems[2] <= 2048); + + + uint32_t image_array_max_layers; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS, + &image_array_max_layers); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image_array_max_layers <= 2048); + + uint32_t image_rd_max; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES, + &image_rd_max); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image_rd_max >= 128); + + uint32_t image_rorw_max; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES, + &image_rorw_max); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image_rorw_max >= 64); + + uint32_t sampler_max; + status = hsa_agent_get_info(agent, + HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS, + &sampler_max); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(sampler_max >= 16); + } + + return; +} + +hsa_status_t callback_check_agents(hsa_agent_t agent, void* data) { + // Check attributes of the agent + check_attributes(agent); + + // Keep iterating + return HSA_STATUS_SUCCESS; +} + +void check_agents() { + hsa_status_t status; + const char *err_str; + status = hsa_iterate_agents(callback_check_agents, NULL); + hsa_status_string(status, &err_str); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, + "\nErr_code: %d Err_Str: %s\n", + status, err_str); + return; +} + +// Callback function to get the number of regions of an agent +hsa_status_t callback_get_num_regions(hsa_region_t region, void* data) { + int *num_regions = (int *)data; + (*num_regions)++; + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the list of regions of an agent +hsa_status_t callback_get_regions(hsa_region_t region, void* data) { + hsa_region_t **region_list = (hsa_region_t **)data; + **region_list = region; + (*region_list)++; + return HSA_STATUS_SUCCESS; +} + +// Callback function to get a global memory region that allow allocation +hsa_status_t callback_get_region_global_allocatable(hsa_region_t region, + void* data) { + hsa_status_t status; + hsa_region_segment_t segment; + status = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); + ASSERT(HSA_STATUS_SUCCESS == status); + bool runtime_alloc_allowed; + status = hsa_region_get_info(region, + HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, + &runtime_alloc_allowed); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_REGION_SEGMENT_GLOBAL == segment && runtime_alloc_allowed) { + hsa_region_t* ret = (hsa_region_t*)data; + *ret = region; + return HSA_STATUS_INFO_BREAK; + } + return HSA_STATUS_SUCCESS; +} + +void launch_no_op_kernels(hsa_agent_t* agent, + hsa_queue_t* queue, + int num_packets) { + hsa_status_t status; + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0 , sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(*agent, + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, + *agent, + 0, + 1, + symbol_names, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Signal and dispatch packet + hsa_signal_t* signals = + (hsa_signal_t*) malloc(sizeof(hsa_signal_t) * num_packets); + hsa_kernel_dispatch_packet_t dispatch_packet; + + int jj; + for (jj = 0; jj < num_packets; ++jj) { + status = hsa_signal_create(1, 0, NULL, &signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Get size of dispatch_packet + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + + // Fill info for the default dispatch_packet + memset(&dispatch_packet, 0, packet_size); + dispatch_packet.header |= + HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= + HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= + 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + + // Enqueue dispatch packets + hsa_kernel_dispatch_packet_t* queue_packet; + for (jj = 0; jj < num_packets; ++jj) { + // Increment the write index of the queue + uint64_t write_index = hsa_queue_add_write_index_relaxed(queue, 1); + // Set the value fo the dispatch packet to the correct signal + dispatch_packet.completion_signal = signals[jj]; + // Obtain the address of the queue packet entry + queue_packet = + (hsa_kernel_dispatch_packet_t*) (queue->base_address + + write_index * packet_size); + // Copy the initialized packet to the queue packet entry + memcpy(queue_packet, &dispatch_packet, packet_size); + // Set the queue packet entries header.type value + // to HSA_PACKET_TYPE_KERNEL_DISPATCH + // This allows the command processor to process this packet. + queue_packet->header |= + HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + // Ring the doorbell + hsa_signal_store_relaxed(queue->doorbell_signal, write_index); + } + + // Wait until all dispatch packets finish executing + for (jj = 0; jj < num_packets; ++jj) { + hsa_signal_value_t value = + hsa_signal_wait_relaxed(signals[jj], + HSA_SIGNAL_CONDITION_EQ, + 0, + UINT64_MAX, + HSA_WAIT_STATE_BLOCKED); + ASSERT(0 == value); + } + + // Destroy signals + for (jj = 0; jj < num_packets; ++jj) { + status = hsa_signal_destroy(signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + free(signals); + + // Destroy the loaded module + destroy_module(module); + + // Destroy the executable + hsa_executable_destroy(executable); + + // Destroy the code object + hsa_code_object_destroy(code_object); + + return; +} + +hsa_code_object_t load_code_object(hsa_agent_t* agent, + char* file_name, + char* kernel_name) { + hsa_status_t status; + +/* + // Load the BRIG module for the target kernel. + hsa_ext_brig_module_t* brig_module; + brig_utils_status_t brig_status; + brig_status = create_brig_module_from_brig_file(file_name, &brig_module); + ASSERT(BRIG_UTILS_STATUS_SUCCESS == brig_status); + + // Create HSA program + hsa_ext_program_handle_t program; + status = hsa_ext_program_create(agent, 1, HSA_EXT_BRIG_MACHINE_LARGE, HSA_EXT_BRIG_PROFILE_FULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the Brig module to HSA program + hsa_ext_brig_module_handle_t module; + status = hsa_ext_add_module(program, brig_module, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Construct finalization request list. + hsa_ext_finalization_request_t finalization_request_list; + finalization_request_list.module = module; + finalization_request_list.program_call_convention = 0; + brig_status = find_symbol_offset(brig_module, kernel_name, &finalization_request_list.symbol); + ASSERT(BRIG_UTILS_STATUS_SUCCESS == brig_status); + + // Finalize the HSA program + status = hsa_ext_finalize_program(program, *agent, 1, &finalization_request_list, NULL, NULL, 0, NULL, 0); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the BRIG data loaded from the brig file. + brig_status = destroy_brig_module(brig_module); + ASSERT(BRIG_UTILS_STATUS_SUCCESS == brig_status); + + // Get the HSA code descriptor address. + hsa_ext_code_descriptor_t* hsa_code_descriptor; + status = hsa_ext_query_kernel_descriptor_address(program, module, finalization_request_list.symbol, &hsa_code_descriptor); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + code_object.handle = hsa_code_descriptor->code.handle; +*/ + hsa_code_object_t code_object; + code_object.handle = 0; + return code_object; +} + +hsa_status_t callback_serialize_alloc(size_t size, + hsa_callback_data_t data, + void** address) { + *address = malloc(size); + return HSA_STATUS_SUCCESS; +} diff --git a/src/core/api/test_helper_func.h b/src/core/api/test_helper_func.h new file mode 100644 index 0000000..f654bda --- /dev/null +++ b/src/core/api/test_helper_func.h @@ -0,0 +1,82 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _TEST_HELPER_FUNC_H_ +#define _TEST_HELPER_FUNC_H_ + +#include + +hsa_status_t callback_get_num_agents(hsa_agent_t agent, void* data); + +hsa_status_t callback_get_agents(hsa_agent_t agent, void* data); + +hsa_status_t callback_check_agents(hsa_agent_t agent, void* data); + +hsa_status_t callback_get_kernel_dispatch_agent(hsa_agent_t agent, void* data); + +hsa_status_t callback_get_num_regions(hsa_region_t region, void* data); + +hsa_status_t callback_get_regions(hsa_region_t region, void* data); + +hsa_status_t callback_get_region_global_allocatable(hsa_region_t region, + void* data); + +void check_agents(); + +void check_system_info(); + +void launch_no_op_kernels(hsa_agent_t* agent, + hsa_queue_t* queue, + int num_packets); + +hsa_code_object_t load_code_object(hsa_agent_t* agent, + char* file_name, + char* kernel_name); + +hsa_status_t callback_serialize_alloc(size_t size, + hsa_callback_data_t data, + void** address); + +#endif // _TEST_HELPER_FUNC_H_ diff --git a/src/core/api/test_hsa_agent_extension_supported.c b/src/core/api/test_hsa_agent_extension_supported.c new file mode 100644 index 0000000..faaf8cf --- /dev/null +++ b/src/core/api/test_hsa_agent_extension_supported.c @@ -0,0 +1,231 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +/** + * + * Test Name: hsa_agent_extension_supported + * + * Purpose: + * Verify that if the API hsa_agent_extension_supported API + * works as expected. + * + * Description: + * + * 1) After initializing the hsa Runtime, call hsa_agent_extension_supported API a + * know extension. Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before initializing the hsa Runtime, call hsa_agent_extension_supported API. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_agent_extension_supported API with an invalid agent. + * Check if the return value is HSA_STATUS_ERROR_INVALID_AGENT. + * + * 4) Call hsa_agent_extension_supported API with an invalid extension. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 5) Call hsa_agent_extension_supported API with a NULL result parameter. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return + * int + * + */ + +int test_hsa_agent_extension_supported() { + bool result; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + status = hsa_agent_extension_supported(HSA_EXTENSION_IMAGES, agent_list.agents[ii], 1, 0, &result); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return + * int + * + */ + +int test_hsa_agent_extension_supported_not_initialized() { + bool result; + hsa_status_t status; + + hsa_agent_t agent; + agent.handle = 0; + + status = hsa_agent_extension_supported(HSA_EXTENSION_IMAGES, agent, 1, 0, &result); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_agent_extension_supported API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime was not initialized.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return + * int + * + */ + +int test_hsa_agent_extension_supported_invalid_agent() { + bool result; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_agent_t agent; + agent.handle = 0; + + status = hsa_agent_extension_supported(HSA_EXTENSION_IMAGES, agent, 1, 0, &result); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_AGENT == status, "The hsa_agent_extension_supported API failed to return HSA_STATUS_ERROR_INVALID_AGENT when passed an invalid agent.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return + * int + * + */ + +int test_hsa_agent_extension_supported_invalid_extension() { + bool result; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + status = hsa_agent_extension_supported(-1, agent_list.agents[ii], 1, 0, &result); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_agent_extension_supported API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when an invalid extension was specified.\n"); + } + + status = hsa_shut_down(); + + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #5 + * + * @Return: + * int + * + */ + +int test_hsa_agent_extension_supported_null_result_ptr() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + status = hsa_agent_extension_supported(HSA_EXTENSION_IMAGES, agent_list.agents[ii], 1, 0, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_agent_extension_supported API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when a NULL result pointer was used.\n"); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_agent_get_exception_policies.c b/src/core/api/test_hsa_agent_get_exception_policies.c new file mode 100644 index 0000000..af800d1 --- /dev/null +++ b/src/core/api/test_hsa_agent_get_exception_policies.c @@ -0,0 +1,230 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +/** + * + * Test Name: hsa_agent_get_exception_policies + * + * Purpose: + * Verify that if the API hsa_agent_get_exception_policies API + * works as expected. + * + * Description: + * + * 1) Call hsa_agent_get_exception_policies API for a supported profile. + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Call hsa_agent_get_exception_policies API before initializing the runtime. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_agent_get_exception_policies API with an invalid agent. + * Check if the return value is HSA_STATUS_ERROR_INVALID_AGENT. + * + * 4) Call hsa_agent_get_exception_policies API with a NULL mask parameter. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + + * 4) Call hsa_agent_get_exception_policies API with an invalid profile. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return + * int + * + */ + +int test_hsa_agent_get_exception_policies() { + uint16_t mask; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + status = hsa_agent_get_exception_policies(agent_list.agents[ii], HSA_PROFILE_FULL, &mask); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return + * int + * + */ + +int test_hsa_agent_get_exception_policies_not_initialized() { + uint16_t mask; + hsa_status_t status; + + hsa_agent_t agent; + agent.handle = 0; + + status = hsa_agent_get_exception_policies(agent, HSA_PROFILE_FULL, &mask); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_agent_get_exception_policies API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime was not initialized.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return + * int + * + */ + +int test_hsa_agent_get_exception_policies_invalid_agent() { + uint16_t mask; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_agent_t agent; + agent.handle = 0; + + status = hsa_agent_get_exception_policies(agent, HSA_PROFILE_FULL, &mask); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_AGENT == status, "The hsa_agent_get_exception_policies API failed to return HSA_STATUS_ERROR_INVALID_AGENT when passed an invalid agent.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_agent_get_exception_policies_null_mask_ptr() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + status = hsa_agent_get_exception_policies(agent_list.agents[ii], HSA_PROFILE_FULL, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_agent_get_exception_policies API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when a NULL mask pointer was used.\n"); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #5 + * + * @Return: + * int + * + */ + +int test_hsa_agent_get_exception_policies_invalid_profile() { + uint16_t mask; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + status = hsa_agent_get_exception_policies(agent_list.agents[ii], -1, &mask); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_agent_get_exception_policies API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when an invalid profile was specified.\n"); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_agent_get_info.c b/src/core/api/test_hsa_agent_get_info.c new file mode 100644 index 0000000..d14a5eb --- /dev/null +++ b/src/core/api/test_hsa_agent_get_info.c @@ -0,0 +1,361 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +/** + * + * Test Name: agent_get_info + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Iterate over all agents and check agent information. + * + * 2) Before the runtime is initialized call hsa_agent_get_info and check + * that the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_agent_get_info with an invalid agent handle. + * Check if the return value is HSA_STATUS_ERROR_INVALID_AGENT. + * + * 4) Call hsa_agent_get_info with invalid agent attribute. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 5) Call hsa_agent_get_info with a NULL value parameter. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_agent_get_info() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + + get_agent_list(&agent_list); + + // Iterate through the attributes of all of the agents + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_agent_t agent = agent_list.agents[ii]; + + char name[64]; + + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's name.\n"); + + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's vendor name.\n"); + + hsa_agent_feature_t agent_feature; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &agent_feature); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get any agent features.\n"); + + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's machine model.\n"); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's profile.\n"); + + hsa_default_float_rounding_mode_t rounding_mode; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's default rounding mode.\n"); + + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES, &rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's default rounding mode.\n"); + + bool fast_f16_operation; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &fast_f16_operation); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's f16 HSAIL operation flag.\n"); + + uint32_t qmax = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &qmax); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get maximum number of queues supported by the agent.\n"); + + uint32_t max = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's queue max size.\n"); + // Verify if the max size is power of 2 + if (qmax > 0) { + ASSERT_MSG(max&&(!(max&(max-1))), "Max queue size is not power of 2!\n"); + } + + uint32_t min = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &min); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's queue min size.\n"); + + hsa_queue_type_t queue_type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's queue type.\n"); + ASSERT_MSG(HSA_QUEUE_TYPE_SINGLE == queue_type || HSA_QUEUE_TYPE_MULTI == queue_type, "Neither queue type is supported.\n"); + + uint32_t node; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &node); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's node info.\n"); + + hsa_device_type_t device_type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's device type.\n"); + ASSERT_MSG(HSA_DEVICE_TYPE_CPU == device_type || HSA_DEVICE_TYPE_GPU == device_type || HSA_DEVICE_TYPE_DSP == device_type, "No device type is supported.\n"); + + uint32_t cache_size[4]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, cache_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's cache size.\n"); + + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's instruction set architecture.\n"); + + uint8_t extensions[128]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_EXTENSIONS, &extensions); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's extensions mask.\n"); + + uint16_t major_version; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_VERSION_MAJOR, &major_version); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's major version number.\n"); + + uint16_t minor_version; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_VERSION_MINOR, &minor_version); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent's minor version number.\n"); + + // Attributes that are only supported by agents that support kernel dispatch + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH == agent_feature) { + uint32_t size = 0; + // Verify wavefront size + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agnet's wavefront size.\n"); + + // Verify if the size is power of 2 and in the range of [1, 256] + ASSERT_MSG(size && (!(size & (size-1))) && size <= 256, "Size of the agent's wavefront is not correct.\n"); + + size = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's workgroup max size.\n"); + + // It imply that workgroup max size can't be 0, verify the value of size + ASSERT_MSG(size>0, "Faild to get a correct workgroup max size.\n"); + + uint16_t max_dim[3]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, max_dim); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's workgroup max dim.\n"); + + // Verify each maximum is greater than 0 and not greater than workgroup max size + ASSERT_MSG(max_dim[0] > 0 && + max_dim[0] <= size && + max_dim[1] > 0 && + max_dim[1] <= size && + max_dim[2] > 0 && + max_dim[2] <= size, + "Value of max_dim is not correct.\n"); + + uint32_t grid_size = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &grid_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's grid max size.\n"); + + // Grid max size must be greater than workgroup max size + ASSERT_MSG(grid_size >= size, "The value for grid and workgroup size is not correct!\n"); + + hsa_dim3_t hsa_dim; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &hsa_dim); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's grid max dim.\n"); + + // Verify each maximum is greater than 0, no greater than grid max size, and no smaller than corresponding workgroup max dim + ASSERT_MSG(hsa_dim.x >0 && + hsa_dim.x <= grid_size && + hsa_dim.x >= max_dim[0] && + hsa_dim.y >0 && + hsa_dim.y <= grid_size && + hsa_dim.y >= max_dim[1] && + hsa_dim.z >0 && + hsa_dim.z <= grid_size && + hsa_dim.z >= max_dim[2], + "The value of grid max dim is not correct.\n"); + + uint32_t fbarrier_size = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &fbarrier_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the agent's max fbarrier size.\n"); + ASSERT_MSG(fbarrier_size >= 32, "FBARRIER size must be at least 32!\n"); + } + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_agent_get_info_not_initialized() { + hsa_status_t status; + char name[64]; + + hsa_agent_t invalid_agent; + invalid_agent.handle = 10; + status = hsa_agent_get_info(invalid_agent, HSA_AGENT_INFO_NAME, name); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_agent_get_info API didn't return HSA_STATUS_ERROR_NOT_INITIALIZED when it was called with an un-initialized runtime.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_hsa_agent_get_info_invalid_agent() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + char name[64]; + hsa_agent_t invalid_agent; + invalid_agent.handle = 0; + status = hsa_agent_get_info(invalid_agent, HSA_AGENT_INFO_NAME, name); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_AGENT == status, "The hsa_agent_info API didn't return HSA_STATUS_ERROR_INVALID_AGENT when it was called with an invalid agent.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_agent_get_info_invalid_attribute() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + char pt[128]; + + // Pass in an invalid attribute value + status = hsa_agent_get_info(agent_list.agents[0], -1, (void*) &pt); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_agent_info API didn't return HSA_STATUS_ERROR_INVALID_ARGUMENT when it was called with an invalid attribute.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #5 + * + * @Return: + * int + * + */ + +int test_hsa_agent_get_info_invalid_ptr() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + status = hsa_agent_get_info(agent_list.agents[0], HSA_AGENT_INFO_NAME, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_agent_info API didn't return HSA_STATUS_ERROR_INVALID_ARGUMENT when it was called with a NULL variable.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_agent_iterate_regions.c b/src/core/api/test_hsa_agent_iterate_regions.c new file mode 100644 index 0000000..eb6be60 --- /dev/null +++ b/src/core/api/test_hsa_agent_iterate_regions.c @@ -0,0 +1,202 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * Test Name: hsa_agent_iterate_regions + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) After init HsaRt, get the list of agents and call hsa_agent_iterate_regions on a valid agent + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before init HsaRt, call hsa_agent_iterate_regions, and check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED + * + * 3) Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT when passing callback is NULL + * + * 4) Check if the return value is HSA_STATUS_ERROR_INVALID_AGENT when passing an invalid HSA agent + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_agent_iterate_regions() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + int number = 0; + status = hsa_iterate_agents(callback_get_num_agents, &number); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_agent_t ptr_list[number]; + hsa_agent_t* ptr_arg = ptr_list; + + status = hsa_iterate_agents(callback_get_agents, &ptr_arg); + ASSERT_MSG(HSA_STATUS_SUCCESS == status); + + // Work with the first agent + hsa_agent_t agent = ptr_list[0]; + + // Get the total number of regions for the agent + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &number); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_agent_iterate_regions failed."); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_agent_iterate_regions_not_initialized() { + hsa_status_t status; + int number; + + hsa_agent_t invalid_agent; + invalid_agent.handle = 0; + status = hsa_agent_iterate_regions(invalid_agent, callback_get_num_regions, &number); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_agent_iterate_regions API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before the runtime was initialized.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_hsa_agent_iterate_regions_invalid_argument() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + int number = 0; + status = hsa_iterate_agents(callback_get_num_agents, &number); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_agent_t ptr_list[number]; + hsa_agent_t* ptr_arg = ptr_list; + + status = hsa_iterate_agents(callback_get_agents, &ptr_arg); + ASSERT_MSG(HSA_STATUS_SUCCESS == status); + + // Work with the first agent + hsa_agent_t agent = ptr_list[0]; + + // Get the total number of regions for the agent + void *callback_null = NULL; + status = hsa_agent_iterate_regions(agent, callback_null, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "Failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when passed NULL callback function.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_agent_iterate_regions_invalid_agent() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + int number = 0; + + hsa_agent_t invalid_agent; + + // Most likely no a valid Agent handle + invalid_agent.handle = 0; + + status = hsa_agent_iterate_regions(invalid_agent, callback_get_num_regions, &number); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_AGENT == status, "The hsa_agent_iterate_regions API failed to return HSA_STATUS_INVALID_AGENT when called before the runtime was initialized.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_code_object_get_info.c b/src/core/api/test_hsa_code_object_get_info.c new file mode 100644 index 0000000..bb116a9 --- /dev/null +++ b/src/core/api/test_hsa_code_object_get_info.c @@ -0,0 +1,183 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_code_object_get_info + * Scope: Conformance + * + * Purpose: Query an code object for all infos. + * + * Test Description: + * 1. Find an agent that supports kernel dispatch. + * 2. Create a code object by finalizing the no_op kernel. + * 3. Query all the infos on the code object: + * 1) version + * 2) type + * 3) ISA + * 4) machine model + * 5) profile + * 6) default float rounding mode + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_code_object_get_info() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + hsa_machine_model_t agent_machine_model; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL, &agent_machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t agent_profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t agent_default_float_rounding_mode; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &agent_default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(agent_machine_model, agent_profile, agent_default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + memset(&code_object, 0, sizeof(hsa_code_object_t)); + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = pfn.hsa_ext_program_finalize(program, agent_isa, 0, control_directives, NULL, HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // get code object infos + char version[64]; + hsa_code_object_type_t type; + hsa_isa_t isa; + hsa_machine_model_t machine_model; + hsa_profile_t profile; + hsa_default_float_rounding_mode_t float_rounding_mode; + int i; + + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_VERSION, version); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code object's version.\n"); + int version_length = strlen(version); + for (i = version_length; i < 64; ++i) { + if (0 != version[i]) { + ASSERT_MSG(0, "The code object's version string must be padded with NULLs.\n"); + } + } + + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_TYPE, &type); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code object's type.\n"); + + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_ISA, &isa); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code object's ISA.\n"); + + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code object's machine model.\n"); + + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_PROFILE, &profile); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code object's profile.\n"); + + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &float_rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code object's default float rounding mode.\n"); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_code_symbol_get_info.c b/src/core/api/test_hsa_code_symbol_get_info.c new file mode 100644 index 0000000..4ffc74b --- /dev/null +++ b/src/core/api/test_hsa_code_symbol_get_info.c @@ -0,0 +1,223 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_code_symbol_get_info + * Scope: Conformance + * + * Purpose: Iterate through all code symbols, and + * apply hsa_code_symbol_get_info() on each code code symbol object within the + * iteration callback. + * + * Test Description: + * 1. Find an agent that supports kernel dispatch. + * 2. Create a code object by finalizing the vector_copy kernel. + * 3. Iterate through all code symbols by + * calling hsa_code_object_iterate_symbols(). + * 4. Within the callback of the iteration, query all symbol infos. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +hsa_status_t callback_iterate_symbols(hsa_code_object_t code_object, + hsa_code_symbol_t code_symbol, + void* data) { + hsa_status_t status; + hsa_symbol_kind_t type; + uint32_t name_length; + uint32_t module_name_length; + hsa_symbol_linkage_t linkage; + hsa_variable_allocation_t variable_allocation; + hsa_variable_segment_t variable_segment; + uint32_t variable_alignment; + uint32_t variable_size; + bool variable_is_const; + uint32_t kernel_kernarg_segment_size; + uint32_t kernel_kernarg_segment_alignment; + uint32_t kernel_group_segment_size; + uint32_t kernel_private_segment_size; + bool kernel_dynamic_callstack; + uint32_t indirect_function_call_convention; + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_TYPE, &type); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's type.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME_LENGTH, &name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's name length.\n"); + ASSERT(name_length > 0); + + char name[name_length + 1]; + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME, &name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's name.\n"); + name[name_length] = '\0'; + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH, &module_name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's model name length.\n"); + + char module_name[module_name_length + 1]; + if (module_name_length > 0) { + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME, &module_name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's module name.\n"); + module_name[module_name_length] = '\0'; + } + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_LINKAGE, &linkage); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's likage.\n"); + + switch (type) { + case HSA_SYMBOL_KIND_VARIABLE: + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION, &variable_allocation); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable alloation.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT, &variable_segment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable segment.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT, &variable_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable alignment.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE, &variable_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST, &variable_is_const); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable is const.\n"); + break; + + case HSA_SYMBOL_KIND_KERNEL: + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernel_kernarg_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel segment size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kernel_kernarg_segment_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernarg segment alignment.\n"); + ASSERT(kernel_kernarg_segment_alignment <= 16); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &kernel_group_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel group segment size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kernel_private_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel private group size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, &kernel_dynamic_callstack); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel dynamic callstack.\n"); + break; + + case HSA_SYMBOL_KIND_INDIRECT_FUNCTION: + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION, &indirect_function_call_convention); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's indirect function call convention.\n"); + break; + default: + break; + } + + return HSA_STATUS_SUCCESS; +} + +int test_hsa_code_symbol_get_info() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load a valid brig module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the module and get the code object & executable + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = finalize_executable(agent, + 1, + &module, + machine_model, + profile, + default_float_rounding_mode, + code_object_type, + call_convention, + control_directives, + &code_object, + &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_code_object_iterate_symbols(code_object, + callback_iterate_symbols, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_executable_create.c b/src/core/api/test_hsa_executable_create.c new file mode 100644 index 0000000..578b0e6 --- /dev/null +++ b/src/core/api/test_hsa_executable_create.c @@ -0,0 +1,184 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_executable_create + * Scope: Conformance + * + * Purpose: create HSA executables with various parameter settings. + * + * Test Description: + * 1. Create an executable without initializing the HSA runtime. + * 2. Initialize the HSA runtime, then create executables with correct settings. + * 3. Create executables with invalid arguments: profile and exe_ptr. + * 4. Create executables until out-of-resource. + * + */ + + +#include +#include +#include +#include + + + +int test_hsa_executable_create() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // create executables with correct settings + hsa_executable_t exe[4]; + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + exe); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_create( + HSA_PROFILE_BASE, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + exe + 1); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_FROZEN, + NULL, + exe + 2); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_create( + HSA_PROFILE_BASE, + HSA_EXECUTABLE_STATE_FROZEN, + NULL, + exe + 3); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_executable_destroy(exe[0]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_destroy(exe[1]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_destroy(exe[2]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_destroy(exe[3]); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +// create an executable without initializing the HSA runtime +int test_hsa_executable_create_not_initialized() { + hsa_status_t status; + hsa_executable_t exe; + + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + &exe); + ASSERT(HSA_STATUS_ERROR_NOT_INITIALIZED == status); + + return 0; +} + +// create an executable with invalid arguments: null exe_ptr +int test_hsa_executable_create_invalid_argument() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + NULL); + ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + +// create executables until out-of-resource +int test_hsa_executable_create_out_of_resources() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // may need a larger "executables_max" (65k x 1k) to reach to an OOR state. + const uint32_t executables_max = 65536; + hsa_executable_t* exe_array; + exe_array = (hsa_executable_t*)malloc(sizeof(hsa_executable_t) * executables_max); + uint32_t executable_count = 0; + uint32_t i; + for (i = 0; i < executables_max; ++i) { + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + exe_array + i); + if (HSA_STATUS_SUCCESS == status) { + ++executable_count; + } else if (HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } else { + // unexpected error + ASSERT(0); + } + } + for (i = 0; i < executable_count; ++i) { + status = hsa_executable_destroy(exe_array[i]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + free(exe_array); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} diff --git a/src/core/api/test_hsa_executable_destroy.c b/src/core/api/test_hsa_executable_destroy.c new file mode 100644 index 0000000..cfcd45c --- /dev/null +++ b/src/core/api/test_hsa_executable_destroy.c @@ -0,0 +1,151 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_executable_destroy + * Scope: Conformance + * + * Purpose: destroy HSA executables with various parameter settings. + * + * Test Description: + * 1. Initialize the HSA runtime, then create an executable with correct setting + * so that we have a valid executable. + * 2. Shutdown HSA runtime, then destroy the executable. Check the return error + * code (NOT_INITIALIZED). + * 3. Initialize the HSA runtime again, then create an executable with correct + * setting. + * 4. Destroy the executable, no error should occur. + * 5. Destroy the executable again, expect to receive an error code + * INVALID_EXECUTABLE. + * 6. Set the executable's handle to an invalid value, i.e., (uint64_t)-1. Then + * destroy the executable, and expect to receive an error code + * INVALID_EXECUTABLE. + * 7. Shutdown the HSA runtime and finish. + * + */ + +#include +#include +#include + +int test_hsa_executable_destroy() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // create an executable with correct setting + hsa_executable_t exe; + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + &exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + // destroy the executable, no error should occur + status = hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + +int test_hsa_executable_destroy_not_initialized() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // create an executable with correct setting + hsa_executable_t exe; + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + &exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + // shutdown HSA runtime and then destroy the executable + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_ERROR_NOT_INITIALIZED == status); + return 0; +} + +int test_hsa_executable_destroy_invalid_executable() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // create an executable with correct setting + hsa_executable_t exe; + status = hsa_executable_create( + HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + &exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + // destroy the executable, no error should occur + status = hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable again, expect to receive an error code + // INVALID_EXECUTABLE + status = hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_ERROR_INVALID_EXECUTABLE == status); + + // Set the executable's handle to an invalid value, then + // destroy the executable + exe.handle = (uint64_t)-1; + status = hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_ERROR_INVALID_EXECUTABLE == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + diff --git a/src/core/api/test_hsa_executable_get_info.c b/src/core/api/test_hsa_executable_get_info.c new file mode 100644 index 0000000..b7d5d3d --- /dev/null +++ b/src/core/api/test_hsa_executable_get_info.c @@ -0,0 +1,103 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_executable_get_info + * Scope: Conformance + * + * Purpose: get infos from an executable. + * + * Test Description: + * 1. Initialize HSA runtime, then properly create an executable. + * 2. Query executable infos. + * + */ + +#include +#include +#include + +int test_hsa_executable_get_info() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_profile_t profiles[2] = { + HSA_PROFILE_BASE, + HSA_PROFILE_FULL}; + hsa_executable_state_t states[2] = { + HSA_EXECUTABLE_STATE_UNFROZEN, + HSA_EXECUTABLE_STATE_FROZEN}; + int i; + int j; + for (i = 0; i < 2; ++i) { + for (j = 0; j < 2; ++j) { + hsa_executable_t exe; + status = hsa_executable_create(profiles[i], states[j], NULL, &exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_profile_t profile; + status = hsa_executable_get_info(exe, + HSA_EXECUTABLE_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(profiles[i] == profile); + + hsa_executable_state_t state; + status = hsa_executable_get_info(exe, + HSA_EXECUTABLE_INFO_STATE, &state); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(states[j] == state); + + hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_SUCCESS == status); + } + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + + diff --git a/src/core/api/test_hsa_executable_load_code_object.c b/src/core/api/test_hsa_executable_load_code_object.c new file mode 100644 index 0000000..c2f5f15 --- /dev/null +++ b/src/core/api/test_hsa_executable_load_code_object.c @@ -0,0 +1,348 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_executable_load_code_object + * Scope: Conformance + * + * Purpose: load code object into HSA executables with various parameter + * settings. + * + * Test Description: + * 1. Without initializing the HSA runtime, declare a set of agent, executable, + * and code object. Load the code object into the executable, expect to + * receive NOT_INITIALIZED error. + * 2. Initialize HSA runtime, then properly create a set of agent, executable, + * and code object. + * 3. Load the code object into the executable. No error should occur at this + * point. + * 4. For each invalid agent, invalid executable, and invalid code object, call + * the hsa_executable_load_code_object() and pass these invalid object as + * argument. The error code should indicate which argument is invalid. + * 5. Freeze the valid executable object, do a load again with all correct + * arguments. Expect to receive FROZEN_EXECUTABLE error. + * 6. Destroy all the object, shutdown the HSA runtime, and finish. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +void load_module_finalize_program( + hsa_ext_finalizer_pfn_t* pfn, + hsa_ext_module_t* module_ptr, + hsa_code_object_t* code_object_ptr, + hsa_ext_program_t* program_ptr, + hsa_executable_t* exe_ptr) { + hsa_status_t status; + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + + status = hsa_iterate_agents(callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Load the BRIG module + ASSERT(0 == load_module_from_file("no_op.brig", module_ptr)); + + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + // Create the program + memset(program_ptr, 0, sizeof(hsa_ext_program_t)); + status = pfn->hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, program_ptr); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the brig modules to the program + status = pfn->hsa_ext_program_add_module(*program_ptr, *module_ptr); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the program and extract the code object + memset(code_object_ptr, 0, sizeof(hsa_code_object_t)); + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = pfn->hsa_ext_program_finalize(*program_ptr, agent_isa, 0, control_directives, NULL, HSA_CODE_OBJECT_TYPE_PROGRAM, code_object_ptr); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_executable_create(profile, + HSA_EXECUTABLE_STATE_UNFROZEN, NULL, exe_ptr); + ASSERT(HSA_STATUS_SUCCESS == status); +} + +int test_hsa_executable_load_code_object() { + hsa_status_t status; + + // initialize the HSA runtime, and create all required objects for a + // successful load + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_ext_module_t module; + hsa_code_object_t code_object; + hsa_ext_program_t program; + hsa_executable_t exe; + load_module_finalize_program(&pfn, &module, &code_object, &program, &exe); + + // load the code object into this executable, no error should occur + status = hsa_executable_load_code_object(exe, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + +int test_hsa_executable_load_code_object_not_initialized() { + hsa_status_t status; + + // load code object without initializing the HSA runtime + hsa_agent_t invalid_agent; + hsa_executable_t invalid_exe; + hsa_code_object_t invalid_code_obj; + status = hsa_executable_load_code_object(invalid_exe, invalid_agent, invalid_code_obj, NULL); + ASSERT(HSA_STATUS_ERROR_NOT_INITIALIZED == status); + + return 0; +} + +int test_hsa_executable_load_code_object_invalid_executable() { + hsa_status_t status; + + // initialize the HSA runtime, and create all required objects + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_ext_module_t module; + hsa_code_object_t code_object; + hsa_ext_program_t program; + hsa_executable_t exe; + load_module_finalize_program(&pfn, &module, &code_object, &program, &exe); + + // load this valid code object into an invalid executable + hsa_executable_t invalid_exe; + invalid_exe.handle = (uint64_t)-1; + status = hsa_executable_load_code_object(invalid_exe, agent, code_object, NULL); + ASSERT(HSA_STATUS_ERROR_INVALID_EXECUTABLE == status); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + +int test_hsa_executable_load_code_object_invalid_agent() { + hsa_status_t status; + + // initialize the HSA runtime, and create all required objects + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_ext_module_t module; + hsa_code_object_t code_object; + hsa_ext_program_t program; + hsa_executable_t exe; + load_module_finalize_program(&pfn, &module, &code_object, &program, &exe); + + // load the code object with an invalid agent + hsa_agent_t invalid_agent; + invalid_agent.handle = (uint64_t)-1; + status = hsa_executable_load_code_object(exe, invalid_agent, code_object, NULL); + ASSERT(HSA_STATUS_ERROR_INVALID_AGENT == status); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + +int test_hsa_executable_load_code_object_invalid_code_object() { + hsa_status_t status; + + // initialize the HSA runtime, and create all required objects + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_executable_t exe; + status = hsa_executable_create(profile, + HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + // load an invalid code object into the executable + hsa_code_object_t invalid_code_object; + invalid_code_object.handle = (uint64_t)-1; + status = hsa_executable_load_code_object(exe, agent, invalid_code_object, NULL); + ASSERT(HSA_STATUS_ERROR_INVALID_CODE_OBJECT == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} + +int test_hsa_executable_load_code_object_frozen_executable() { + hsa_status_t status; + + // initialize the HSA runtime, and create all required objects + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_ext_module_t module; + hsa_code_object_t code_object; + hsa_ext_program_t program; + hsa_executable_t exe; + load_module_finalize_program(&pfn, &module, &code_object, &program, &exe); + + // load this valid code object into an invalid executable + status = hsa_executable_freeze(exe, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_load_code_object(exe, agent, code_object, NULL); + ASSERT(HSA_STATUS_ERROR_FROZEN_EXECUTABLE == status); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} diff --git a/src/core/api/test_hsa_executable_symbol_get_info.c b/src/core/api/test_hsa_executable_symbol_get_info.c new file mode 100644 index 0000000..7aeccc8 --- /dev/null +++ b/src/core/api/test_hsa_executable_symbol_get_info.c @@ -0,0 +1,229 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_executable_symbol_get_info + * Scope: Conformance + * + * Purpose: get infos from an executable symbol. + * + * Test Description: + * 1. Initialize HSA runtime, then properly create an executable. + * 2. Create a "vector_copy" code object. + * 3. Load the code object into the executable. + * 4. Query executable symbol infos. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +hsa_status_t callback_executable_iterate_symbols(hsa_executable_t exe, hsa_executable_symbol_t exe_symbol, void* data) { + hsa_status_t status; + + // Get the executable status + hsa_executable_state_t exe_state; + status = hsa_executable_get_info(exe, HSA_EXECUTABLE_INFO_STATE, &exe_state); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_symbol_kind_t type; + uint32_t name_length; + char* name; + uint32_t module_name_length; + char* module_name; + hsa_agent_t agent; + uint64_t variable_address; + hsa_symbol_linkage_t linkage; + hsa_variable_allocation_t variable_allocation; + hsa_variable_segment_t variable_segment; + uint32_t variable_alignment; + uint32_t variable_size; + bool variable_is_const; + uint64_t kernel_object; + uint32_t kernel_kernarg_segment_size; + uint32_t kernel_kernarg_segment_alignment; + uint32_t kernel_group_segment_size; + uint32_t kernel_private_segment_size; + bool kernel_dynamic_callstack; + uint64_t indirect_function_object; + uint32_t indirect_function_call_convention; + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &type); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's type.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's name length.\n"); + ASSERT(name_length > 0); + + name = (char*)malloc(sizeof(char) * (name_length + 1)); + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's name.\n"); + name[name_length] = '\0'; + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH, &module_name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's model name length.\n"); + + if (module_name_length > 0) { + module_name = (char*)malloc(sizeof(char) * (module_name_length + 1)); + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME, &module_name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's module name.\n"); + module_name[module_name_length] = '\0'; + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE, &linkage); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's linkage.\n"); + + if (HSA_SYMBOL_KIND_VARIABLE == type) { + if (HSA_SYMBOL_KIND_VARIABLE == type && HSA_VARIABLE_ALLOCATION_AGENT == variable_allocation) { + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &agent); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's agent.\n"); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &variable_address); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable address.\n"); + + if (HSA_EXECUTABLE_STATE_UNFROZEN == exe_state) { + ASSERT(0 == variable_address); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION, &variable_allocation); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable allocation.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT, &variable_segment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable segment.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT, &variable_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's vairable alignment.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &variable_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST, &variable_is_const); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable is const.\n"); + } + + if (HSA_SYMBOL_KIND_KERNEL == type) { + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel object.\n"); + if (HSA_EXECUTABLE_STATE_UNFROZEN == exe_state) { + ASSERT(0 == kernel_object); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernel_kernarg_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel kernarg segment size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kernel_kernarg_segment_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel kernarg segment alignment.\n"); + ASSERT(kernel_kernarg_segment_alignment <= 16); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &kernel_group_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel group segment size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kernel_private_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel private segment size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, &kernel_dynamic_callstack); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel dynamic callstack.\n"); + } + + if (HSA_SYMBOL_KIND_INDIRECT_FUNCTION == type) { + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT, &indirect_function_object); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's indirect function object.\n"); + if (HSA_EXECUTABLE_STATE_UNFROZEN == exe_state) { + ASSERT(0 == indirect_function_object); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION, &indirect_function_call_convention); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's indirect function call convention.\n"); + } +} + +int test_hsa_executable_symbol_get_info() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Finalize the "vector_copy" kernel + hsa_code_object_t code_object; + code_object.handle = (uint64_t)-1; + code_object = load_code_object(&agent, "vector_copy.brig", "&__vector_copy_kernel"); + ASSERT((uint64_t)-1 != code_object.handle); + + // Create the executable + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_executable_t exe; + status = hsa_executable_create(profile, + HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the code object into this executable, no error should occur + status = hsa_executable_load_code_object(exe, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_executable_iterate_symbols(exe, callback_executable_iterate_symbols, NULL); + + status = hsa_executable_destroy(exe); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_init.c b/src/core/api/test_hsa_init.c new file mode 100644 index 0000000..a2ca8d1 --- /dev/null +++ b/src/core/api/test_hsa_init.c @@ -0,0 +1,108 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include + +/** + * + * Test Name: hsa_init + * + * Purpose: Verify that API of hsa_init works as expected + * + * Description: + * + * 1) Open an new instance of HSA runtime; + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Keep opening instance of HSA runtime until it reaches MAX number + * Check if the return value is HSA_STATUS_ERROR_REFCOUNT_OVERFLOW. + * + */ + +/** + * + * @Brief: + * Simply open one instance of HSA runtime. + * + * @Return: + * int + * + */ + +int test_hsa_init() { + hsa_status_t status; + + status = hsa_init(); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The call to hsa_init() failed.\n"); + + return 0; +} + +/** + * + * @Brief: + * Simply open instances of HSA runtime as much as possible until it reaches MAX number. + * + * @Return: + * int + * + */ + +int test_hsa_init_MAX() { + hsa_status_t status; + + int i; + for (i = 0; i < INT32_MAX; ++i) { + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_init(); + ASSERT_MSG(HSA_STATUS_ERROR_REFCOUNT_OVERFLOW == status, "The HSA_STATUS_ERROR_REFCOUNT_OVERFLOW error code wasn't returned on the INT32_MAX+1 call to hsa_init().\n"); + + return 0; +} diff --git a/src/core/api/test_hsa_isa_compatible.c b/src/core/api/test_hsa_isa_compatible.c new file mode 100644 index 0000000..824b88b --- /dev/null +++ b/src/core/api/test_hsa_isa_compatible.c @@ -0,0 +1,350 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_isa_compatible + * Scope: Conformance + * + * Purpose: Verify that the hsa_isa_compatible API works correctly. + * + * Test Description: + * + * 1) Create a code object and obtain the code objects isa. Query + * a valid dispatch agent for its isa. Use the hsa_isa_compatible + * API to query if the two isas are compatible. The call to hsa_isa_compatible + * should succeed. The two isa may or may not be compatible. + * + * 2) Attempt to call hsa_isa_compatible twice; first with an invalid + * code_object_isa and then with an invalid agent_isa. In both cases + * the API should return HSA_STATUS_ERROR_INVALID_ISA. + * + * 3) Call hsa_isa_compatible with a NULL result argument. The API + * should return HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_isa_compatible() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + memset(&code_object, 0, sizeof(hsa_code_object_t)); + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = pfn.hsa_ext_program_finalize(program, agent_isa, 0, control_directives, NULL, HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the code object + hsa_isa_t code_object_isa; + code_object_isa.handle = (uint64_t)-1; + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_ISA, &code_object_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != code_object_isa.handle); + + // Check if these two ISAs are compatible + bool compatible; + status = hsa_isa_compatible(code_object_isa, agent_isa, &compatible); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_compatible_invalid_isa() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + memset(&code_object, 0, sizeof(hsa_code_object_t)); + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = pfn.hsa_ext_program_finalize(program, agent_isa, 0, control_directives, NULL, HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the code object + hsa_isa_t code_object_isa; + code_object_isa.handle = (uint64_t)-1; + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_ISA, &code_object_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != code_object_isa.handle); + + // Check that an invalid code object isa generates + // a HSA_STATUS_ERROR_INVALID_ISA error + bool compatible; + hsa_isa_t invalid_isa; + invalid_isa.handle = 0; + status = hsa_isa_compatible(invalid_isa, agent_isa, &compatible); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ISA == status, "The hsa_isa_compatible API did not return HSA_STATUS_ERROR_INVALID_ISA when passed an invalid object code isa."); + + // Check that an invalid agent isa generates + // a HSA_STATUS_ERROR_INVALID_ISA error + status = hsa_isa_compatible(code_object_isa, invalid_isa, &compatible); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ISA == status, "The hsa_isa_compatible API did not return HSA_STATUS_ERROR_INVALID_ISA when passed an invalid agent isa."); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_compatible_null_result() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + memset(&code_object, 0, sizeof(hsa_code_object_t)); + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = pfn.hsa_ext_program_finalize(program, agent_isa, 0, control_directives, NULL, HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the code object + hsa_isa_t code_object_isa; + code_object_isa.handle = (uint64_t)-1; + status = hsa_code_object_get_info(code_object, + HSA_CODE_OBJECT_INFO_ISA, &code_object_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != code_object_isa.handle); + + // Check if these two ISAs are compatible + status = hsa_isa_compatible(code_object_isa, agent_isa, NULL); + ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_isa_from_name.c b/src/core/api/test_hsa_isa_from_name.c new file mode 100755 index 0000000..82d1485 --- /dev/null +++ b/src/core/api/test_hsa_isa_from_name.c @@ -0,0 +1,199 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_isa_compatible + * Scope: Conformance + * + * Purpose: Verify that the hsa_isa_from_name API works as + * expected. + * + * Description: + * + * 1) Query an agent for a supported isa and get the isa's name. + * Use that isa name in a call to hsa_isa_from_name to obtain + * another isa. Check that the agent's isa and the named isa + * are compatible, if not the same. + * + * 2) Call hsa_isa_from_name with a null name. Verify the API + * return HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 3) Call hsa_isa_from_name with a null isa. Verify the API + * return HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 4) Call hsa_isa_from_name with a invalid isa name. Verify the API + * return HSA_STATUS_ERROR_INVALID_ISA_NAME. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_isa_from_name() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get the name length of the agent's isa + size_t length = 0; + status = hsa_isa_get_info(agent_isa, HSA_ISA_INFO_NAME_LENGTH, 0, &length); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the name of the agent's isa + char name[length]; + status = hsa_isa_get_info(agent_isa, HSA_ISA_INFO_NAME, 0, &name); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Obtain another reference to the isa using the name + hsa_isa_t named_isa; + status = hsa_isa_from_name(name, &named_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Check if these two ISAs are compatible + bool compatible; + status = hsa_isa_compatible(named_isa, agent_isa, &compatible); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_from_name_null_name() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Try to Obtain another reference to the isa using a NULL name + hsa_isa_t named_isa; + status = hsa_isa_from_name(NULL, &named_isa); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status,"The hsa_isa_from_name API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when called with a null name."); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_from_name_null_isa() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get the name length of the agent's isa + size_t length = 0; + status = hsa_isa_get_info(agent_isa, HSA_ISA_INFO_NAME_LENGTH, 0, &length); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the name of the agent's isa + char name[length]; + status = hsa_isa_get_info(agent_isa, HSA_ISA_INFO_NAME, 0, &name); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Try to obtain another reference to the isa using a NULL isa + status = hsa_isa_from_name(name, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status,"The hsa_isa_from_name API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when called with a null isa."); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_from_name_invalid_isa_name() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Make up the name of the agent's isa + char* name = "Invalid:ISA:Name"; + + // Obtain another reference to the isa using the name + hsa_isa_t named_isa; + status = hsa_isa_from_name(name, &named_isa); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ISA_NAME == status,"The hsa_isa_from_name API failed to return HSA_STATUS_ERROR_INVALID_ISA_NAME when called with a invalid isa name."); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_isa_get_info.c b/src/core/api/test_hsa_isa_get_info.c new file mode 100644 index 0000000..1e65ef8 --- /dev/null +++ b/src/core/api/test_hsa_isa_get_info.c @@ -0,0 +1,258 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: hsa_isa_get_info + * Scope: Conformance + * + * Purpose: Verify that the hsa_isa_get_info works as expected. + * + * Test Description: + * + * 1) Query a valid isa for all supported ISA attributes. Verify + * that the api executes successfully and return valid values. + * + * 2) Verify that the hsa_isa_get_info API returns + * HSA_STATUS_ERROR_NOT_INITIALIZED if it is called before the + * runtime is initialized. + * + * 3) Verify that the hsa_isa_get_info API returns + * HSA_STATUS_ERROR_INVALID_ISA if the ISA is NULL. + * + * 4) Verify that the hsa_isa_get_info API returns + * HSA_STATUS_ERROR_INVALID_INDEX if index is out of range. + * + * 5) Verify that the hsa_isa_get_info API returns + * HSA_STATUS_ERROR_INVALID_ARGUMENT if the specified attribute + * is invalid. + * + * 6) Verify that the hsa_isa_get_info API returns + * HSA_STATUS_ERROR_INVALID_ARGUMENT if the value parameter + * is null. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_isa_get_info() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t isa; + isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != isa.handle); + + // Query ISA's info + uint32_t name_length; + status = hsa_isa_get_info(isa, HSA_ISA_INFO_NAME_LENGTH, 0, &name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get ISA's name length.\n"); + ASSERT_MSG(name_length > 0, "ISA's name length is zero.\n"); + + char name[name_length]; + status = hsa_isa_get_info(isa, HSA_ISA_INFO_NAME, 0, name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get ISA's name.\n"); + + uint32_t call_conv_count = 0; + status = hsa_isa_get_info(isa, HSA_ISA_INFO_CALL_CONVENTION_COUNT, 0, &call_conv_count); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get ISA's call convention count.\n"); + + uint32_t i; + for (i = 0; i < call_conv_count; ++i) { + uint32_t wavefront_size; + status = hsa_isa_get_info(isa, + HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE, + i, + &wavefront_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get ISA's call convention wavefront size.\n"); + ASSERT_MSG(wavefront_size >= 1 && wavefront_size <= 256, "ISA's call convention wavefront size must be in the range of [1, 256].\n"); + ASSERT_MSG(!(wavefront_size & (wavefront_size-1)), "ISA's call convention wavefronts per compute unit must be a power of 2.\n"); + + uint32_t wavefronts_per_comp_unit; + status = hsa_isa_get_info(isa, + HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT, + i, + &wavefronts_per_comp_unit); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get ISA's call convention wavefronts per compute unit.\n"); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_get_info_not_initialized() { + hsa_status_t status; + + uint32_t name_length; + hsa_isa_t isa; + status = hsa_isa_get_info(isa, HSA_ISA_INFO_NAME_LENGTH, 0, &name_length); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_isa_get_info API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before the runtime was initialized.\n"); + + return 0; +} + +int test_hsa_isa_get_info_invalid_isa() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query with a null ISA. + uint32_t value; + hsa_isa_t invalid_isa; + invalid_isa.handle = 0; + status = hsa_isa_get_info(invalid_isa, HSA_ISA_INFO_NAME_LENGTH, 0, &value); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ISA == status, "The hsa_isa_get_info API failed to return HSA_STATUS_ERROR_INVALID_ISA when it was called with an invalid isa.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_get_info_index_out_of_range() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t isa; + isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != isa.handle); + + // Query the isa with a index greater than the call convention count. + uint32_t value; + status = hsa_isa_get_info(isa, HSA_ISA_INFO_NAME_LENGTH, -1, &value); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_INDEX == status, "The hsa_isa_get_info API failed to return HSA_STATUS_ERROR_INVALID_INDEX when it was called with an invalid attribute.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_get_info_invalid_attribute() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t isa; + isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != isa.handle); + + // Query ISA's info + uint32_t value; + status = hsa_isa_get_info(isa, -1, 0, &value); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_isa_get_info API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when it was called with an invalid attribute.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_isa_get_info_invalid_null_value() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a kernel dispatch agent + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents( + callback_get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from this agent + hsa_isa_t isa; + isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != isa.handle); + + // Query ISA's info + status = hsa_isa_get_info(isa, HSA_ISA_INFO_NAME_LENGTH, 0, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_isa_get_info API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when it was called with an invalid attribute.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_iterate_agents.c b/src/core/api/test_hsa_iterate_agents.c new file mode 100644 index 0000000..d7ac7b7 --- /dev/null +++ b/src/core/api/test_hsa_iterate_agents.c @@ -0,0 +1,139 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include "test_helper_func.h" + +/** + * + * Test Name: hsa_iterate_agents + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) After init HsaRt, call hsa_iterate_agents API with the callback of callback_test + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before init HsaRt, call hsa_iterate_agents, and check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED + * + * 3) Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT when passing NULL to callback + */ + +hsa_status_t callback_test(hsa_agent_t agent, void* data) { + return HSA_STATUS_SUCCESS; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_iterate_agents() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_iterate_agents(callback_test, NULL); + + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_iterate_agents API failed.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_iterate_agents_not_initialized() { + hsa_status_t status; + + int number = 0; + status = hsa_iterate_agents(callback_test, &number); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_iterate_agents API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before initializing the runtime.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_hsa_iterate_agents_invalid_callback() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_iterate_agents(NULL, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_iterate_agents API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when called with a NULL callback function.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_memory_allocate.c b/src/core/api/test_hsa_memory_allocate.c new file mode 100644 index 0000000..7d690b8 --- /dev/null +++ b/src/core/api/test_hsa_memory_allocate.c @@ -0,0 +1,334 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * Test Name: hsa_memory_allocate + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Get the list of agents and their valid regions + * Call the hsa_memory_allocate API with the valid region and appropriate size. + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before initializing the HSA runtime, call hsa_memory_allocate, + * and check that the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Check that the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT when + * passing a NULL value as the base address parameter. + * + * 4) Check that the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT when + * passing a 0 for the size parameter. + * + * 5) Check that the return value is HSA_STATUS_ERROR_INVALID_ALLOCATION when + * passing a size argument greater than the size of region. + * + * 6) Check if the return value is HSA_STATUS_ERROR_INVALID_REGION when passing region 0. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_memory_allocate() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Work with the first agent + ASSERT(0 < agent_list.num_agents); + int i = 0; + hsa_agent_t agent = agent_list.agents[0]; + + // Getting total number of regions for the agent + int num_regions = 0; + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate memory to hold region list of an agent + hsa_region_t region_list[num_regions]; + hsa_region_t *ptr_reg= region_list; + status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region = region_list[0]; + + // Getting information about region's maximum size + size_t size_r = 0; + status = hsa_region_get_info(region, HSA_REGION_INFO_ALLOC_MAX_SIZE, (void *)&size_r); + ASSERT(HSA_STATUS_SUCCESS == status); + + int size = 1024; + void *addr = 0; + + status = hsa_memory_allocate(region, size, &addr); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_memory_allocate API failed to correctly allocate memory.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_memory_allocate_not_initialized() { + hsa_status_t status; + void *addr = 0; + + hsa_region_t invalid_region; + invalid_region.handle = 0; + status = hsa_memory_allocate(invalid_region, 10, &addr); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_memory_allocate API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before the runtime was initialized.\n"); + + return 0; +} + +/** + * + *@Brief: + *Implement Description #3 + * + * @Return: + * int + * + */ + +int test_hsa_memory_allocate_null_ptr() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Work with the first agent + ASSERT(0 < agent_list.num_agents); + int i = 0; + hsa_agent_t agent = agent_list.agents[0]; + + // Get the total number of regions for the agent + int num_regions = 0; + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate memory to hold region list of an agent + hsa_region_t region_list[num_regions]; + hsa_region_t *ptr_reg = region_list; + status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region = region_list[0]; + + status = hsa_memory_allocate(region, 1024, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_memory_allocate API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when called with a NULL pointer.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + *@Brief: + *Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_memory_allocate_zero_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Work with the first agent + ASSERT(0 < agent_list.num_agents); + int i = 0; + hsa_agent_t agent = agent_list.agents[0]; + + // Get the total number of regions for the agent + int num_regions = 0; + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate memory to hold region list of an agent + hsa_region_t region_list[num_regions]; + hsa_region_t *ptr_reg = region_list; + status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region = region_list[0]; + + void *addr = 0; + status = hsa_memory_allocate(region, 0, &addr); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_memory_allocate API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when called with a size of 9.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #5 + * + * @Return: + * int + * + */ + +int test_hsa_memory_allocate_invalid_allocation() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Work with the first agent + ASSERT(0 < agent_list.num_agents); + int i = 0; + hsa_agent_t agent = agent_list.agents[0]; + + // Get the total number of regions for the agent + int num_regions = 0; + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate memory to hold the region list of an agent + hsa_region_t region_list[num_regions]; + hsa_region_t *ptr_reg = region_list; + status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region = region_list[0]; + + // Get information about region's maximum size + size_t size_r = 0; + status = hsa_region_get_info(region, HSA_REGION_INFO_ALLOC_MAX_SIZE, (void *)&size_r); + ASSERT(HSA_STATUS_SUCCESS == status); + size_t size = size_r+10; + void *addr = 0; + + status = hsa_memory_allocate(region, size, &addr); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ALLOCATION == status, "The hsa_memory_allocate API failed to return HSA_STATUS_ERROR_INVALID_ALLOCATION when called with an invalid size.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #6 + * + * @Return: + * int + * + */ + +int test_hsa_memory_allocate_invalid_region() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + void *addr; + hsa_region_t invalid_region; + invalid_region.handle = 0; + status = hsa_memory_allocate(invalid_region, 1024, &addr); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_REGION == status, "The hsa_memory_allocate API failed to return HSA_STATUS_ERROR_INVALID_REGION when passed an invalid region.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_memory_deregister.c b/src/core/api/test_hsa_memory_deregister.c new file mode 100644 index 0000000..73e88c5 --- /dev/null +++ b/src/core/api/test_hsa_memory_deregister.c @@ -0,0 +1,123 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * Test Name: hsa_memory_deregister + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Register a block of memory using hsa_memory_register + * and call hsa_memory_deregister; check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before the runtime is initialized call hsa_memory_deregister and check if the return + * value is HSA_STATUS_ERROR_NOT_INITIALIZED + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_memory_deregister() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + int size = 1024; + void *addr = 0; + + // Getting a block of memory allocated + addr = (void*) malloc(sizeof(char) * size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Registering the allocated memory + status = hsa_memory_register(addr, size); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_deregister(addr, size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_memory_deregister API failed to properly deregister memory."); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_hsa_memory_deregister_not_initialized() { + hsa_status_t status; + void *addr; + + status = hsa_memory_deregister(addr, 0); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_memory_deregister API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before the runtime was initialized."); + + return 0; +} diff --git a/src/core/api/test_hsa_memory_free.c b/src/core/api/test_hsa_memory_free.c new file mode 100644 index 0000000..b897f2f --- /dev/null +++ b/src/core/api/test_hsa_memory_free.c @@ -0,0 +1,143 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * Test Name: hsa_memory_free + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Allocate a block of memory using hsa_memory_allocate and then call hsa_memory_free. + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before init HsaRt, call hsa_memory_free, and check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_memory_free() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Work with the first agent + hsa_agent_t agent = agent_list.agents[0]; + + // Get the total number of regions for the agent + int num_regions = 0; + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region_list[num_regions]; + hsa_region_t *ptr_reg = region_list; + status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_reg); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region = region_list[0]; + + // Getting information about region's maximum size + size_t size_r = 0; + status = hsa_region_get_info(region, HSA_REGION_INFO_ALLOC_MAX_SIZE, (void *)&size_r); + ASSERT(HSA_STATUS_SUCCESS == status); + + int size = 1024; + void *addr = 0; + + // Allocating a block of memory + status = hsa_memory_allocate(region, size, &addr); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Deallocating the memory + status = hsa_memory_free(addr); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_memory_free API failed to free memory correctly.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_hsa_memory_free_not_initialized() { + hsa_status_t status; + void *addr = 0; + + status = hsa_memory_free(addr); + ASSERT_MSG(status == HSA_STATUS_ERROR_NOT_INITIALIZED, "The hsa_memory_free API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before the runtime was initialized."); + + return 0; +} diff --git a/src/core/api/test_hsa_memory_register.c b/src/core/api/test_hsa_memory_register.c new file mode 100644 index 0000000..a9ca34a --- /dev/null +++ b/src/core/api/test_hsa_memory_register.c @@ -0,0 +1,160 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * Test Name: hsa_memory_register + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) After init HsaRt, get a block of memory using an OS allocator (malloc) and + * call hsa_memory_register; check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before init HsaRt, call hsa_memory_register, and check if the return + * value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT when + * passing size 0 but address is not NULL. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_memory_register() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + int size = 1024; + void *addr = 0; + + addr = (void*) malloc(sizeof(char) * size); + ASSERT(addr != NULL); + + status = hsa_memory_register(addr, size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_memory_register API failed to register memory correctly.\n"); + + status = hsa_memory_deregister(addr, size); + ASSERT(HSA_STATUS_SUCCESS == status); + + free(addr); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_hsa_memory_register_not_initialized() { + hsa_status_t status; + void *addr = NULL; + + status = hsa_memory_register(addr, 0); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_memory_register API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before initializing the runtime."); + + return 0; +} + +/** +* +* @Brief: +* Implement Description #3 +* +* @Return: +* int +* +*/ + +int test_hsa_memory_register_invalid_argument() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + int size = 1024; + void *addr; + + // Getting a block of memory allocated + addr = (void*) malloc(sizeof(char) * size); + ASSERT(addr != NULL); + + status = hsa_memory_register(addr, 0); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_memory_register API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when passed a size of 0."); + + free(addr); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_add_write_index_acq_rel.c b/src/core/api/test_hsa_queue_add_write_index_acq_rel.c new file mode 100644 index 0000000..d93c8d2 --- /dev/null +++ b/src/core/api/test_hsa_queue_add_write_index_acq_rel.c @@ -0,0 +1,125 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_add_write_index_acq_rel + * + * Purpose: Verify that API of hsa_queue_add_write_index_acq_rel() works as + * expected. + * + * Description: + * + * 1) Iteratively add an offset (from 0 to a pre-defined number) the write index of + * a queue using hsa_queue_add_write_index_acq_rel(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_add_write_index_acq_rel() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_add_write_index_acq_rel(queue, jj); + + // Verify the returned write index is the same to the previous write index + ASSERT_MSG(write_index_returned == write_index_prev, "The hsa_queue_add_write_index_acq_rel API did not properly return the previous write index value."); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT_MSG(hsa_queue_load_write_index_relaxed(queue) == write_index_prev + jj, "The hsa_queue_add_write_index_acq_rel API did not properly update the write index value."); + + // Update the "previous" write index + write_index_prev += jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_add_write_index_acquire.c b/src/core/api/test_hsa_queue_add_write_index_acquire.c new file mode 100644 index 0000000..7764874 --- /dev/null +++ b/src/core/api/test_hsa_queue_add_write_index_acquire.c @@ -0,0 +1,125 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_add_write_index_acquire + * + * Purpose: Verify that API of hsa_queue_add_write_index_acquire() works as + * expected. + * + * Description: + * + * 1) Iteratively add an offset (from 0 to a pre-defined number) the write index of + * a queue using hsa_queue_add_write_index_acq_rel(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_add_write_index_acquire() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_add_write_index_acquire(queue, jj); + + // Verify the returned write index is the same to the previous write index + ASSERT_MSG(write_index_returned == write_index_prev, "The hsa_queue_add_write_index_acquire API did not properly return the previous write index value.\n"); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT_MSG(hsa_queue_load_write_index_relaxed(queue) == write_index_prev + jj, "The hsa_queue_add_write_index_acquire API did not properly set the write index value.\n"); + + // Update the "previous" write index + write_index_prev += jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_add_write_index_relaxed.c b/src/core/api/test_hsa_queue_add_write_index_relaxed.c new file mode 100644 index 0000000..6787b1e --- /dev/null +++ b/src/core/api/test_hsa_queue_add_write_index_relaxed.c @@ -0,0 +1,125 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_add_write_index_relaxed + * + * Purpose: Verify that API of hsa_queue_add_write_index_relaxed() works as + * expected. + * + * Description: + * + * 1) Iteratively add an offset (from 0 to a pre-defined number) the write index of + * a queue using hsa_queue_add_write_index_acq_rel(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_add_write_index_relaxed() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_add_write_index_relaxed(queue, jj); + + // Verify the returned write index is the same to the previous write index + ASSERT_MSG(write_index_returned == write_index_prev, "The hsa_queue_add_write_index_relaxed API failed to properly return the previous value of the write index.\n"); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT_MSG(hsa_queue_load_write_index_relaxed(queue) == write_index_prev + jj, "The hsa_queue_add_write_index_relaxed API failed to properly set the write index.\n"); + + // Update the "previous" write index + write_index_prev += jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_add_write_index_release.c b/src/core/api/test_hsa_queue_add_write_index_release.c new file mode 100644 index 0000000..9231441 --- /dev/null +++ b/src/core/api/test_hsa_queue_add_write_index_release.c @@ -0,0 +1,125 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_add_write_index_release + * + * Purpose: Verify that API of hsa_queue_add_write_index_release() works as + * expected. + * + * Description: + * + * 1) Iteratively add an offset (from 0 to a pre-defined number) the write index of + * a queue using hsa_queue_add_write_index_acq_rel(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_add_write_index_release() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_add_write_index_release(queue, jj); + + // verify the returned write index is the same to the previous write index + ASSERT_MSG(write_index_returned == write_index_prev, "The hsa_queue_add_write_index_release API failed to properly return the previous write index.\n"); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT_MSG(hsa_queue_load_write_index_relaxed(queue) == write_index_prev + jj, "The hsa_queue_add_write_index_release API failed to properly set the value of the write index.\n"); + + // Update the "previous" write index + write_index_prev += jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_cas_write_index_acq_rel.c b/src/core/api/test_hsa_queue_cas_write_index_acq_rel.c new file mode 100644 index 0000000..0b2673e --- /dev/null +++ b/src/core/api/test_hsa_queue_cas_write_index_acq_rel.c @@ -0,0 +1,125 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_cas_write_index_acq_rel + * + * Purpose: Verify that API of hsa_queue_cas_write_index_acq_rel() works as + * expected. + * + * Description: + * + * 1) Iteratively set the write index of a queue from 0 to a pre-defined + * number using hsa_queue_cas_write_index_acq_rel(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_cas_write_index_acq_rel() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_cas_write_index_acq_rel(queue, write_index_prev, jj); + + // Verify the returned write index is the same to the previous write index + ASSERT_MSG(write_index_returned == write_index_prev, "The hsa_queue_cas_write_index_acq_rel API failed to properly return the previous value of the write index.\n"); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT_MSG(hsa_queue_load_write_index_relaxed(queue) == jj, "The hsa_queue_cas_write_index_acq_rel API failed to properly set the value of the write index.\n"); + + // Update the "previous" write index + write_index_prev = jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_cas_write_index_acquire.c b/src/core/api/test_hsa_queue_cas_write_index_acquire.c new file mode 100644 index 0000000..5415cec --- /dev/null +++ b/src/core/api/test_hsa_queue_cas_write_index_acquire.c @@ -0,0 +1,126 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_cas_write_index_acquire + + * + * Purpose: Verify that API of hsa_queue_cas_write_index_acquire() works as + * expected. + * + * Description: + * + * 1) Iteratively set the write index of a queue from 0 to a pre-defined + * number using hsa_queue_cas_write_index_acquire(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_cas_write_index_acquire() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_cas_write_index_acquire(queue, write_index_prev, jj); + + // Verify the returned write index is the same to the previous write index + ASSERT_MSG(write_index_returned == write_index_prev, "The hsa_queue_cas_write_index_acquire API failed to properly return the previous value of the write index.\n"); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT_MSG(hsa_queue_load_write_index_relaxed(queue) == jj, "The hsa_queue_cas_write_index_acquire API failed to properly set the write index value.\n"); + + // Update the "previous" write index + write_index_prev = jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_cas_write_index_relaxed.c b/src/core/api/test_hsa_queue_cas_write_index_relaxed.c new file mode 100644 index 0000000..660f4e9 --- /dev/null +++ b/src/core/api/test_hsa_queue_cas_write_index_relaxed.c @@ -0,0 +1,126 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_cas_write_index_relaxed + + * + * Purpose: Verify that API of hsa_queue_cas_write_index_relaxed() works as + * expected. + * + * Description: + * + * 1) Iteratively set the write index of a queue from 0 to a pre-defined + * number using hsa_queue_cas_write_index_relaxed(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_cas_write_index_relaxed() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_cas_write_index_relaxed(queue, write_index_prev, jj); + + // Verify the returned write index is the same to the previous write index + ASSERT_MSG(write_index_returned == write_index_prev, "The hsa_queue_cas_write_index_relaxed API failed to properly return the previous value of the write index.\n"); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT_MSG(hsa_queue_load_write_index_relaxed(queue) == jj, "The hsa_queue_cas_write_index_relaxed API failed to properly set the value of the write index.\n"); + + // Update the "previous" write index + write_index_prev = jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_cas_write_index_release.c b/src/core/api/test_hsa_queue_cas_write_index_release.c new file mode 100644 index 0000000..a8f799c --- /dev/null +++ b/src/core/api/test_hsa_queue_cas_write_index_release.c @@ -0,0 +1,126 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_cas_write_index_release + + * + * Purpose: Verify that API of hsa_queue_cas_write_index_release() works as + * expected. + * + * Description: + * + * 1) Iteratively set the write index of a queue from 0 to a pre-defined + * number using hsa_queue_cas_write_index_release(). + * 2) Verify the returned value from the api is the previous write index. + * 3) Verify the new write index is correctly updated on the queue with the + * load write index api. + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_cas_write_index_release() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + const uint64_t repeats = 64; + uint64_t jj; + uint64_t write_index_prev = 0; + for (jj = 0; jj < repeats; ++jj) { + // CAS the write index + uint64_t write_index_returned = hsa_queue_cas_write_index_release(queue, write_index_prev, jj); + + // Verify the returned write index is the same to the previous write index + ASSERT(write_index_returned == write_index_prev); + + // Verify the new write index hsa been correctly updated on the queue + ASSERT(hsa_queue_load_write_index_relaxed(queue) == jj); + + // Update the "previous" write index + write_index_prev = jj; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_create.c b/src/core/api/test_hsa_queue_create.c new file mode 100644 index 0000000..64937f5 --- /dev/null +++ b/src/core/api/test_hsa_queue_create.c @@ -0,0 +1,404 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_create + * + * Purpose: Verify that API of hsa_queue_create() works as expected + * + * Description: + * + * 1) Create a queue with valid parameters. + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Init the runtime, generate the agent list, shutdown the runtime, + * then create queue. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Create queues until the system is running out of resources. + * Check if the return value is HSA_STATUS_ERROR_OUT_OF_RESOURCES. + * + * 4) Create a queue on an agent that does NOT support dispatch. + * Create a queue on an invalid (not_initialized) agent. + * Check if the return value is HSA_STATUS_ERROR_INVALID_AGENT. + * + * 5) Crate a queue of type MULTI on an agent that only supports + * SINGLE. Check if the return value is + * HSA_STATUS_ERROR_INVALID_QUEUE_CREATION. + * + * 6) Create queues with invalid arguments: queue size of a power + * of 2, an invalid queue "type", queue pointer being NULL. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +#include +#include +#include +#include "test_helper_func.h" +#include + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_queue_create() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Dispatch is not supported on CPU. + continue; + } + + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Attempting to create a queue with the hsa_queue_create API failed.\n"); + + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_queue_create_not_initialized() { + int ii; + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Get all the dispatch agents before shutting down the runtime + hsa_agent_t** dispatch_agents = (hsa_agent_t**)malloc(sizeof(hsa_agent_t*) * agent_list.num_agents); + int num_dispatch_agents = 0; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 != (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Dispatch is supported on GPU, not on CPU + dispatch_agents[num_dispatch_agents] = &(agent_list.agents[ii]); + ++num_dispatch_agents; + } + } + + free_agent_list(&agent_list); + + // Shut down the runtime + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + for (ii = 0; ii < num_dispatch_agents; ++ii) { + hsa_queue_t* queue; + status = hsa_queue_create(*(dispatch_agents[ii]), 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + if (HSA_STATUS_ERROR_INVALID_AGENT == status) { + ASSERT_MSG(0, "The hsa_queue_create API returned HSA_STATUS_ERROR_INVALID_AGENT instead of HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized.\n"); + } else if (HSA_STATUS_ERROR_NOT_INITIALIZED == status) { + // This indicates proper behavior + } else { + ASSERT_MSG(0, "The hsa_queue_create API returned an error besides HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized.\n"); + } + } + + free(dispatch_agents); + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_hsa_queue_create_out_of_resources() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Get max number of queues + uint32_t queues_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queues_max < 1) { + // This agent does not support any queue + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Dispatch is not supported on CPU. + continue; + } + + // Create the queues + const uint32_t queue_size = 128; + hsa_queue_t** queues = (hsa_queue_t**)malloc(queues_max * sizeof(hsa_queue_t*)); + int num_queues = 0; + while (num_queues < queues_max) { + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, queues + num_queues); + if (HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_queue_create API didn't return HSA_STATUS_ERROR_OUT_OF_RESOURCES when more than the maximum number of queues were created."); + ++num_queues; + } + + + // Destroy queues + int jj; + for (jj = 0; jj < num_queues; ++jj) { + status = hsa_queue_destroy(queues[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(queues); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_queue_create_invalid_agent() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue with an invalid agent. + hsa_queue_t* queue; + hsa_agent_t agent; + agent.handle = 0; + status = hsa_queue_create(agent, 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_ERROR_INVALID_AGENT == status); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #5 + * + * @Return: + * int + * + */ + +int test_hsa_queue_create_invalid_queue_creation() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + // Find an agent that only supports one producer + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_TYPE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if ((hsa_queue_type_t)features == HSA_QUEUE_TYPE_SINGLE) { + // Create a queue of type MULTI. + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + + // Expect result of INVALID_QUEUE_CREATION + ASSERT(HSA_STATUS_ERROR_INVALID_QUEUE_CREATION == status); + } + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #6 + * + * @Return: + * int + * + */ + +int test_hsa_queue_create_invalid_argument() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_queue_t* queue; + char* err_string; + + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + // Create a queue with a size that is not a power of 2 + status = hsa_queue_create(agent_list.agents[ii], 5, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + if (HSA_STATUS_ERROR == status) { + ASSERT_MSG(1, "Queue create with size not a power of 2: ERROR_INVALID_ARGUMENT expected, ERROR received.\n"); + } else if (HSA_STATUS_ERROR_INVALID_ARGUMENT == status) { + // This indicates proper behavior + } else { + ASSERT(0); + } + + // Create a queue with an invalid queue type + status = hsa_queue_create(agent_list.agents[ii], 4, 3, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + if (HSA_STATUS_SUCCESS == status) { + ASSERT_MSG(0, "Queue created with \"type\" not a valid type: HSA_STATUS_ERROR_INVALID_ARGUMENT expected, HSA_STATUS_SUCCESS received.\n"); + } else if (HSA_STATUS_ERROR_INVALID_ARGUMENT == status) { + // This indicates proper behavior + } else { + ASSERT(0); + } + + // Create a queue with NULL pointer to the queue + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, NULL); + ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_destroy.c b/src/core/api/test_hsa_queue_destroy.c new file mode 100644 index 0000000..32acdd4 --- /dev/null +++ b/src/core/api/test_hsa_queue_destroy.c @@ -0,0 +1,248 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_destroy + * + * Purpose: Verify that API of hsa_queue_destroy() works as expected + * + * Description: + * + * 1) Destroy a queue with valid argument. + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Destroy a queue after the runtime has been shutdown. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Destroy a queue by a queue pointer that has already been destroyed. + * Check if the return value is HSA_STATUS_ERROR_INVALID_QUEUE. + * + * 4) Destroy a queue through NULL queue pointer + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ +int test_hsa_queue_destroy() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_queue_destroy(queue); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_queue_destroy API failed to destroy the queue."); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ +int test_hsa_queue_destroy_not_initialized() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + hsa_agent_t agent; + agent.handle = 0; + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + agent = agent_list.agents[ii]; + break; + } + + if (agent.handle != 0) { + hsa_queue_t* queue; + // Create a queue on the first agent + status = hsa_queue_create(agent, 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Shutdown the runtime. + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Attempt to destroy the queue after the runtime is shutdown + status = hsa_queue_destroy(queue); + if (HSA_STATUS_SUCCESS == status) { + ASSERT_MSG(0, "The hsa_queue_destroy API returned an unexpected status: HSA_STATUS_ERROR_NOT_INITIALIZED expected, HSA_STATUS_SUCCESS received.\n"); + } else if (HSA_STATUS_ERROR_NOT_INITIALIZED == status) { + // This indicates expected behavior + } else { + ASSERT_MSG(0, "The hsa_queue_destroy API returned an unexpected error.\n"); + } + } else { + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ +int test_hsa_queue_destroy_invalid_queue() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + hsa_queue_t* queue; + // Create a queue on the first agent. + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Attempt to destroy the queue again + status = hsa_queue_destroy(queue); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_QUEUE == status, "The hsa_destroy_queue API didn't return HSA_STATUS_ERROR_INVALID_QUEUE when called on an invalid queue."); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ +int test_hsa_queue_destroy_invalid_argument() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_queue_destroy(NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_destroy_queue API didn't return HSA_STATUS_ERROR_INVALID_ARGUMENT when called on a NULL queue."); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_inactivate.c b/src/core/api/test_hsa_queue_inactivate.c new file mode 100644 index 0000000..ff71764 --- /dev/null +++ b/src/core/api/test_hsa_queue_inactivate.c @@ -0,0 +1,253 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_destroy + * + * Purpose: Verify that API of hsa_queue_destroy() works as expected + * + * Description: + * + * 1) Inactivate a valid, normal queue. + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Inactivate a queue after the runtime has been shutdown. The queue + * has NOT been destroyed before inactivating it. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Inactivate a queue that has already been destroyed. + * Check if the return value is HSA_STATUS_ERROR_INVALID_QUEUE. + * + * 4) Inactivate a queue through NULL queue pointer + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_queue_inactivate() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_queue_inactivate(queue); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The hsa_queue_inactivate API failed when called on a valid queue."); + + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ +int test_hsa_queue_inactivate_not_initialized() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + hsa_agent_t agent; + agent.handle = 0; + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + agent = agent_list.agents[ii]; + break; + } + + if (agent.handle != 0) { + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent, 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Then shutdown the runtime + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Attempt to inactivate the queue after the runtime is shutdown + status = hsa_queue_inactivate(queue); + if (HSA_STATUS_SUCCESS == status) { + ASSERT_MSG(0, "The hsa_queue_inactivate did not return HSA_STATUS_ERROR_NOT_INITIALIZED when called with a runtime that isn't initialized. HSA_STATUS_SUCCESS received instead.\n"); + } else if (HSA_STATUS_ERROR_NOT_INITIALIZED == status) { + // This indicates the expected behavior + } else { + ASSERT_MSG(0, "The hsa_queue_inactivate did not return HSA_STATUS_ERROR_NOT_INITIALIZED when called with a runtime that isn't initialized.\n"); + } + } else { + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ +int test_hsa_queue_inactivate_invalid_queue() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // The agent must support at least one queue + uint32_t queues_max = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(status == HSA_STATUS_SUCCESS); + if (queues_max < 1) { + continue; + } + + hsa_queue_t* queue; + // Create a queue on the first agent. + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Attempt to inactivate the queue after the queue has been destroyed + status = hsa_queue_inactivate(queue); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_QUEUE == status, "The hsa_queue_inactivate API did not return HSA_STATUS_INVALID_QUEUE when called with an invalid queue."); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_queue_inactivate_invalid_argument() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_queue_inactivate(NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_QUEUE == status || HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_queue_inactivate API did not return an expected value when called on a NULL queue.\n"); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_load_read_index_acquire.c b/src/core/api/test_hsa_queue_load_read_index_acquire.c new file mode 100644 index 0000000..fbb8de8 --- /dev/null +++ b/src/core/api/test_hsa_queue_load_read_index_acquire.c @@ -0,0 +1,131 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_load_read_index_acquire + * + * Purpose: Verify that API of hsa_queue_load_read_index_acquire() works as expected + * + * Description: + * + * 1) Load the read index from a valid, normal queue. + * a) Check if the read_index is 0 once the queue is created. + * b) Launch a few kernels, check if the read_index is updated + * correctly. + * c) Repeat b) several times, and check the read_index is correct + * when the it is greater than a set multiple of the queue_size. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_queue_load_read_index_acquire() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t read_index = hsa_queue_load_read_index_acquire(queue); + ASSERT(0 == read_index); + + int jj; + uint64_t total_packets_launched = 0; + for (jj = 1; jj < queue_size; ++jj) { + // Launch a few kernels + launch_no_op_kernels(agent_list.agents + ii, queue, jj); + + // Verify the read_index has been updated correctly + read_index = hsa_queue_load_read_index_acquire(queue); + ASSERT_MSG(total_packets_launched + jj == read_index, "The read index was not updated correctly.\n"); + total_packets_launched = read_index; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_load_read_index_relaxed.c b/src/core/api/test_hsa_queue_load_read_index_relaxed.c new file mode 100644 index 0000000..06932da --- /dev/null +++ b/src/core/api/test_hsa_queue_load_read_index_relaxed.c @@ -0,0 +1,131 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_load_read_index_relaxed + * + * Purpose: Verify that API of hsa_queue_load_read_index_relaxed() works as expected + * + * Description: + * + * 1) Load the read index from a valid, normal queue. + * a) Check if the read_index is 0 once the queue is created. + * b) Launch a few kernels, check if the read_index is updated + * correctly. + * c) Repeat b) several times, and check the read_index is correct + * when the it is greater than a set multiple of the queue_size. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_queue_load_read_index_relaxed() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t read_index = hsa_queue_load_read_index_relaxed(queue); + ASSERT(0 == read_index); + + int jj; + uint64_t total_packets_launched = 0; + for (jj = 1; jj < queue_size; ++jj) { + // Launch a few kernels + launch_no_op_kernels(agent_list.agents + ii, queue, jj); + + // Verify the read_index has been updated correctly + read_index = hsa_queue_load_read_index_relaxed(queue); + ASSERT_MSG(total_packets_launched + jj == read_index, "The read index was not updated correctly.\n"); + total_packets_launched = read_index; + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_load_store_write_index_acquire_relaxed.c b/src/core/api/test_hsa_queue_load_store_write_index_acquire_relaxed.c new file mode 100644 index 0000000..c88da55 --- /dev/null +++ b/src/core/api/test_hsa_queue_load_store_write_index_acquire_relaxed.c @@ -0,0 +1,121 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_load_store_write_index_acquire_relaxed + * + * Purpose: Verify that API of hsa_queue_load_write_index_acquire() and + * hsa_queue_store_write_index_relaxed() works as expected + * + * Description: + * + * 1) Load/Store the write index from a valid, normal queue. + * a) Check if the write_index is 0 once the queue is created. + * b) Store a new value to the write_index. + * c) Load the write_index, and verify the value is the same that + * was stored in step b). + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_load_store_write_index_acquire_relaxed() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT(0 == write_index); + + int jj; + const int num_iterations = 16; + for (jj = 1; jj < num_iterations; ++jj) { + // Store the write_index + hsa_queue_store_write_index_relaxed(queue, (uint64_t)jj); + + // Load the write_index + write_index = hsa_queue_load_write_index_acquire(queue); + + // Verify the write_index has been updated correctly + ASSERT_MSG((uint64_t)jj == write_index, "The write index was not updated correctly.\n"); + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_queue_load_store_write_index_relaxed_release.c b/src/core/api/test_hsa_queue_load_store_write_index_relaxed_release.c new file mode 100644 index 0000000..e74f6bc --- /dev/null +++ b/src/core/api/test_hsa_queue_load_store_write_index_relaxed_release.c @@ -0,0 +1,121 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_load_store_write_index_relaxed_release + * + * Purpose: Verify that API of hsa_queue_load_write_index_relaxed() and + * hsa_queue_store_write_index_release() works as expected + * + * Description: + * + * 1) Load/Store the write index from a valid, normal queue. + * a) Check if the write_index is 0 once the queue is created. + * b) Store a new value to the write_index. + * c) Load the write_index, and verify the value is the same that + * was stored in step b). + */ + +#include +#include +#include +#include "test_helper_func.h" + +int test_hsa_queue_load_store_write_index_relaxed_release() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agents; + get_agent_list(&agents); + + int ii; + for (ii = 0; ii < agents.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agents.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Don't create a queue for test on an agent that does not + // Support DISPATCH. + continue; + } + + // Create a queue + hsa_queue_t* queue; + const uint32_t queue_size = 4; + status = hsa_queue_create(agents.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the read_index has been initialized to 0 + uint64_t write_index = hsa_queue_load_write_index_relaxed(queue); + ASSERT(0 == write_index); + + int jj; + const int num_iterations = 16; + for (jj = 1; jj < num_iterations; ++jj) { + // Store the write_index + hsa_queue_store_write_index_release(queue, (uint64_t)jj); + + // Load the write_index + write_index = hsa_queue_load_write_index_relaxed(queue); + + // Verify the write_index has been updated correctly + ASSERT_MSG((uint64_t)jj == write_index, "The write index was not updated correctly.\n"); + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agents); + + return 0; +} diff --git a/src/core/api/test_hsa_region_get_info.c b/src/core/api/test_hsa_region_get_info.c new file mode 100644 index 0000000..3e44654 --- /dev/null +++ b/src/core/api/test_hsa_region_get_info.c @@ -0,0 +1,249 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include "test_helper_func.h" + +/** + * + * Test Name: region_get_info + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Iterate through all of the regions of an agent and get all region info. + * + * 2) Before the hsa runtime is initialized call hsa_region_get_info and + * check that the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_region_get_info with an invalid region handle. + * Check that the return value is HSA_STATUS_ERROR_INVALID_REGION. + * + * 4) Call hsa_region_get_info with a NULL value for the parameter. + * Check that the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_region_get_info() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Work with the first agent + hsa_agent_t agent = agent_list.agents[0]; + + // Get the total num_regions of regions for the agent + int num_regions = 0; + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region_list[num_regions]; + hsa_region_t *ptr_regions = region_list; + status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_regions); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get region list.\n"); + + // Iterate through the attributes of all of the region + int ii; + for (ii = 0; ii < num_regions; ++ii) { + hsa_region_t region = *(region_list+ii); + + void *addr; + + size_t size; + status = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get region size.\n"); + + hsa_region_segment_t segment_info; + status = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_info); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get region segment info.\n"); + + if (HSA_REGION_SEGMENT_GLOBAL == segment_info) { + uint32_t flag_info = 0; + status = hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flag_info); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get region flag info.\n"); + } + + size_t max_size; + status = hsa_region_get_info(region, HSA_REGION_INFO_ALLOC_MAX_SIZE, &max_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get region's maximum size.\n"); + ASSERT_MSG((HSA_REGION_SEGMENT_GLOBAL != segment_info && max_size == 0)||(HSA_REGION_SEGMENT_GLOBAL == segment_info), "The region's maximum allocation size is not correct.\n"); + + size_t granule_size; + status = hsa_region_get_info(region, HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE, &granule_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get region's allocation granularity.\n"); + if (max_size == 0) { + ASSERT_MSG(granule_size == 0, "The region's granule size is wrong.\n"); + } + + size_t alignment_size; + status = hsa_region_get_info(region, HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT, &alignment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get region's allocation alignment size!\n"); + if (max_size == 0) { + ASSERT_MSG(alignment_size == 0, "The region's allocation alignment size is wrong\n"); + } else { + ASSERT_MSG(alignment_size&&(!(alignment_size&(alignment_size-1))), "The region's alignment size is not power of 2.\n"); + } + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_hsa_region_get_info_not_initialized() { + hsa_status_t status; + size_t size; + + hsa_region_t invalid_region; + invalid_region.handle = 0; + + status = hsa_region_get_info(invalid_region, HSA_REGION_INFO_SIZE, &size); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_region_get_info API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before the runtime was initialized.\n"); + + return 0; +} + +/** +* +* @Brief: +* Implement Description #3 +* +* @Return: +* int +* +*/ + +int test_hsa_region_get_info_invalid_region() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + size_t size; + hsa_region_t invalid_region; + invalid_region.handle = 0; + status = hsa_region_get_info(invalid_region, HSA_REGION_INFO_SIZE, &size); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_REGION == status, "The hsa_region_get_info API failed to return HSA_STATUS_ERROR_INVALID_REGION when passed an invalid region.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** +* +* @Brief: +* Implement Description #4 +* +* @Return: +* int +* +*/ + +int test_hsa_region_get_info_invalid_argument() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Work with the first agent + hsa_agent_t agent = agent_list.agents[0]; + + // Getting total num_regions of regions for the agent + int num_regions = 0; + status = hsa_agent_iterate_regions(agent, callback_get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Malloc memory to hold region list of an agent + hsa_region_t region_list[num_regions]; + hsa_region_t *ptr_regions = region_list; + status = hsa_agent_iterate_regions(agent, callback_get_regions, &ptr_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_region_t region = region_list[0]; + + status = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_region_get_info API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when passed a NULL parameter.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/api/test_hsa_shut_down.c b/src/core/api/test_hsa_shut_down.c new file mode 100644 index 0000000..f76f339 --- /dev/null +++ b/src/core/api/test_hsa_shut_down.c @@ -0,0 +1,133 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_shut_down + * + * Purpose: Verify that if API works as expected. + * + * Description: + * 1) After opening one instance of HSA runtime, shut it down. + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before opening an instance of HSA runtime, shut it down. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Open and shut down an instance of HSA runtime, then shut it down again. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + */ + +/** + * + * @Brief: + * Implement description #1. + * Shutdown is verified with hsa_init() together, actually. + * Should we delete this test case? + * + * @Return + * int + * + */ + +int test_hsa_shut_down() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "The call to hsa_shut_down() failed.\n"); + + return 0; +} + +/** + * + * @Brief + * Implement description #2. + * + * @Return + * int + * + */ + +int test_hsa_shut_down_not_initialized() { + hsa_status_t status; + + status = hsa_shut_down(); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "Calling hsa_shut_down() didn't return HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized."); + + return 0; +} + +/** + * + * @Brief + * Implement description #3. + * + * @Return + * int + * + */ + +int test_hsa_shut_down_after_shut_down() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "Calling hsa_shut_down() didn't return HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized."); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_add_acq_rel.c b/src/core/api/test_hsa_signal_add_acq_rel.c new file mode 100644 index 0000000..e648b7a --- /dev/null +++ b/src/core/api/test_hsa_signal_add_acq_rel.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_add_acq_rel. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_add_acq_rel API + * to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_add_acq_rel() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, added_value = 100, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_add_acq_rel(signal_handle, added_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value + added_value), "The hsa_signal_add_acq_rel API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_add_acquire.c b/src/core/api/test_hsa_signal_add_acquire.c new file mode 100644 index 0000000..09b4dd3 --- /dev/null +++ b/src/core/api/test_hsa_signal_add_acquire.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_add_acquire. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_add_acquire API to modify the + * signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_add_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, added_value = 100, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_add_acquire(signal_handle, added_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value + added_value), "The hsa_signal_add_acquire API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_add_relaxed.c b/src/core/api/test_hsa_signal_add_relaxed.c new file mode 100644 index 0000000..afedf9b --- /dev/null +++ b/src/core/api/test_hsa_signal_add_relaxed.c @@ -0,0 +1,98 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_add_relaxed. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call hsa_signal_all_relaxed API to modify the + * signals value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_add_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, added_value = 100, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_add_relaxed(signal_handle, added_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + + ASSERT_MSG(loaded_value == (initial_value + added_value), "The hsa_signal_add_relaxed API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_add_release.c b/src/core/api/test_hsa_signal_add_release.c new file mode 100644 index 0000000..c1a31ee --- /dev/null +++ b/src/core/api/test_hsa_signal_add_release.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_add_release. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and then call hsa_signal_add_release to + * modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_add_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, added_value = 100, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_add_release(signal_handle, added_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value + added_value), "The hsa_signal_and_release API failed to set the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_and_acq_rel.c b/src/core/api/test_hsa_signal_and_acq_rel.c new file mode 100644 index 0000000..8594165 --- /dev/null +++ b/src/core/api/test_hsa_signal_and_acq_rel.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_and_acq_rel + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create a new signal object then call hsa_signal_and_acq_rel to modify the value. + * Load the new value of the signal and check if the new value is correct. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_and_acq_rel() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, and_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_and_acq_rel(signal_handle, and_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value & and_value), "The hsa_signal_and_acq_rel API failed to modify the signal correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_and_acquire.c b/src/core/api/test_hsa_signal_and_acquire.c new file mode 100644 index 0000000..48639fd --- /dev/null +++ b/src/core/api/test_hsa_signal_and_acquire.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_and_acquire + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create a signal and call the hsa_signal_and_acquire to + * modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_and_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, and_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_and_acquire(signal_handle, and_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value & and_value), "The hsa_signal_and_acquire API failed to modify the signals value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_and_relaxed.c b/src/core/api/test_hsa_signal_and_relaxed.c new file mode 100644 index 0000000..b3a424d --- /dev/null +++ b/src/core/api/test_hsa_signal_and_relaxed.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_and_relaxed + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create a new signal and call hsa_signal_and_relaxed to + * modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_and_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, and_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_and_relaxed(signal_handle, and_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value & and_value), "The hsa_signal_and_relaxed API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_and_release.c b/src/core/api/test_hsa_signal_and_release.c new file mode 100644 index 0000000..c0a36f7 --- /dev/null +++ b/src/core/api/test_hsa_signal_and_release.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_and_release + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create an new signal object and then call he hsa_signal_and_release API + * to modify the value. Load the new value of signal and check if the value is correct. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_and_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, and_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_and_release(signal_handle, and_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value & and_value), "The hsa_signal_and_release API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_cas_acq_rel.c b/src/core/api/test_hsa_signal_cas_acq_rel.c new file mode 100644 index 0000000..ad73ffc --- /dev/null +++ b/src/core/api/test_hsa_signal_cas_acq_rel.c @@ -0,0 +1,98 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_cas_acq_rel. + * + * Purpose: + * Verify that the API works as expected. + * + * Description: + * + * 1) Create an new signal object and call hsa_signal_cas_acq_rel API to swap the value. + * Load the value of signal to check is the new value if stored in to signal. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_cas_acq_rel() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, expected_value = 100, new_value = 200, observed_value, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + observed_value = hsa_signal_cas_acq_rel(signal_handle, expected_value, new_value); + ASSERT_MSG(observed_value == initial_value, "The hsa_signal_cas_acq_rel API failed to return the initial value.\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "The hsa_signal_cas_acq_rel API failed to exchange the new value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_cas_acquire.c b/src/core/api/test_hsa_signal_cas_acquire.c new file mode 100644 index 0000000..92a3b7a --- /dev/null +++ b/src/core/api/test_hsa_signal_cas_acquire.c @@ -0,0 +1,98 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_cas_acquire. + * + * Purpose: + * Verify that is the API works as expected. + * + * Description: + * + * 1) Create an new signal object and switch the value with test_has_signal_cas_acquire. + * Load the value of signal to check if the new value is stored in to signal. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_cas_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, expected_value = 100, new_value = 200, observed_value, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + observed_value = hsa_signal_cas_acquire(signal_handle, expected_value, new_value); + ASSERT_MSG(observed_value == initial_value, "The hsa_signal_cas_acquire API failed to return the initial value.\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "The hsa_signal_cas_acquire API faile to exchange the new value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_cas_relaxed.c b/src/core/api/test_hsa_signal_cas_relaxed.c new file mode 100644 index 0000000..afbe867 --- /dev/null +++ b/src/core/api/test_hsa_signal_cas_relaxed.c @@ -0,0 +1,98 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_cas_relaxed. + * + * Purpose: + * Verify that is the API works as expected. + * + * Description: + * + * 1) Create an new signal object and call hsa_signal_cas_relaxed API to exchange the initial value. + * Load the value of signal to check is the new value if stored in to signal. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_cas_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, expected_value = 100, new_value = 200, observed_value, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + observed_value = hsa_signal_cas_relaxed(signal_handle, expected_value, new_value); + ASSERT_MSG(observed_value == initial_value, "The hsa_signal_cas_relaxed API failed to return the initial value.\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "The hsa_signal_cas_relaxed API failed to exchange the new value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_cas_release.c b/src/core/api/test_hsa_signal_cas_release.c new file mode 100644 index 0000000..7c904e1 --- /dev/null +++ b/src/core/api/test_hsa_signal_cas_release.c @@ -0,0 +1,98 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_cas_release. + * + * Purpose: + * Verify that is the API works as expected. + * + * Description: + * + * 1) Create an new signal object and call hsa_signal_cas_release API to exchange the value. + * Load the value of signal to check if the new value is stored in the signal. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_cas_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, expected_value = 100, new_value = 200, observed_value, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + observed_value = hsa_signal_cas_release(signal_handle, expected_value, new_value); + ASSERT_MSG(observed_value == initial_value, "The hsa_signal_cas_release API failed to return the initial value.\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "The hsa_signal_cas_release API failed to exchange the new value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_create.c b/src/core/api/test_hsa_signal_create.c new file mode 100644 index 0000000..0e204ba --- /dev/null +++ b/src/core/api/test_hsa_signal_create.c @@ -0,0 +1,184 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include + +/** + * + * Test Name: hsa_signal_create + * + * Purpose: + * Verify that if the API works as expect. + * + * Description: + * + * 1) Attempt to create a signal. + * + * 2) Before initializing the runtime, attempt to create a signal. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Attempt to create a signal passing NULL as the signal value. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 4) Attempt to create a signal using 1 for num_consumer value and NULL + * as the consumers list. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 5) Attempt to create a signal using 2 for num_consumer value and an + * agent list that contains duplicate agents. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1. + * + * @Return + * int + * + */ + +int test_hsa_signal_create() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Signal creation failed.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT_MSG(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return + * int + * + */ + +int test_hsa_signal_create_not_initialized() { + hsa_status_t status; + + hsa_signal_value_t initial_value = 100; + hsa_signal_t signal_handle; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_signal_create API didn't return HSA_STATUS_ERROR_NOT_INITIALIZED when called before runtime initialization.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement description #3 + * + * @Return + * int + * + */ + +int test_hsa_signal_create_null_signal() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_signal_create API didn't return HSA_STATUS_ERROR_INVALID_ARGUMENT when called with a NULL signal.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_signal_create_invalid_arg() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t initial_value = 100; + hsa_signal_t signal_handle; + + status = hsa_signal_create(initial_value, 1, NULL, &signal_handle); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_signal_create API didn't return HSA_STATUS_ERROR_INVALID_ARGUMENT when called 1 consumer but a NULL consumer list.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_destroy.c b/src/core/api/test_hsa_signal_destroy.c new file mode 100644 index 0000000..0672528 --- /dev/null +++ b/src/core/api/test_hsa_signal_destroy.c @@ -0,0 +1,180 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_destroy + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create an new signal. + * Destroy the signal and check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before initialize HSA runtime, call hsa_signal_destroy. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED + * + * 3) Call hsa_signal_destroy an invalid signal. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT + * + * 4) Call hsa_signal_destroy using a signal that was previously destroyed. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return + * int + * + */ + +int test_hsa_signal_destroy() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_signal_destroy(signal_handle); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to destroy the signal!\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief:l + * Implement description #2 + * + * @Return + * int + * + */ + +int test_hsa_signal_destroy_not_initialized() { + hsa_status_t status; + + hsa_signal_t signal_handle; + status = hsa_signal_destroy(signal_handle); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_signal_destroy API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when called before runtime initialization.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement description #3 + * + * @Return + * int + * + */ + +int test_hsa_signal_destroy_invalid_arg() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t invalid_signal; + invalid_signal.handle = 0; + status = hsa_signal_destroy(invalid_signal); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_signal_destroy failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when called with an invalid signal.\n"); + + return 0; +} + + /** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_signal_destroy_invalid_signal() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Attempt to destroy the signal again + status = hsa_signal_destroy(signal_handle); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_SIGNAL == status, "The hsa_signal_destroy failed to return HSA_STATUS_ERROR_INVALID_SIGNAL when called with an invalid signal.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_exchange_acq_rel.c b/src/core/api/test_hsa_signal_exchange_acq_rel.c new file mode 100644 index 0000000..9f2ee9c --- /dev/null +++ b/src/core/api/test_hsa_signal_exchange_acq_rel.c @@ -0,0 +1,100 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_exchange_acq_rel. + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create an new signal object and exchange the value of the signal. + * Call hsa_signal_load_relaxed and check if the value is correct. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_exchange_acq_rel() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t initial_value, prev_value, new_value, loaded_value; + initial_value = 100; + new_value = 200; + + hsa_signal_t signal_handle; + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + prev_value = hsa_signal_exchange_acq_rel(signal_handle, new_value); + ASSERT_MSG(prev_value == initial_value, "The hsa_signal_exchange_acq_rel API didn't return the initial value.\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "The hsa_signal_exchange_acq_rel API didn't set the new value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_exchange_acquire.c b/src/core/api/test_hsa_signal_exchange_acquire.c new file mode 100644 index 0000000..12f70fb --- /dev/null +++ b/src/core/api/test_hsa_signal_exchange_acquire.c @@ -0,0 +1,101 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_exchange_acquire. + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create an new signal object and exchange the value of the signal. + * Call hsa_signal_load_relaxed and check if the value written previously is correct. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_exchange_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t initial_value, prev_value, new_value, loaded_value; + initial_value = 100; + new_value = 200; + + hsa_signal_t signal_handle; + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + prev_value = hsa_signal_exchange_acquire(signal_handle, new_value); + ASSERT_MSG(prev_value == initial_value, "The hsa_signal_exchange_acquire API failed to return the initial value.\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "The hsa_signal_exchange_acquire API failed to set the new value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + diff --git a/src/core/api/test_hsa_signal_exchange_relaxed.c b/src/core/api/test_hsa_signal_exchange_relaxed.c new file mode 100644 index 0000000..e3bb1d9 --- /dev/null +++ b/src/core/api/test_hsa_signal_exchange_relaxed.c @@ -0,0 +1,100 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_exchange_relaxed. + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create an new signal object and exchange value of the signal. + * Call hsa_signal_load_relaxed and check if the value written previously is correct. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_exchange_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t initial_value, prev_value, new_value, loaded_value; + initial_value = 100; + new_value = 200; + + hsa_signal_t signal_handle; + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + prev_value = hsa_signal_exchange_relaxed(signal_handle, new_value); + ASSERT_MSG(prev_value == initial_value, "Failed to exchange signal value! The values returned is not correct!\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "Failed to set signal value!\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_exchange_release.c b/src/core/api/test_hsa_signal_exchange_release.c new file mode 100644 index 0000000..3ffdf9c --- /dev/null +++ b/src/core/api/test_hsa_signal_exchange_release.c @@ -0,0 +1,100 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_exchange_release. + * + * Purpose: + * Verify that if the API works as expected. + * + * Description: + * + * 1) Create a signal and call the hsa_signal_exchange_release + * API to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_exchange_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t initial_value, prev_value, new_value, loaded_value; + initial_value = 100; + new_value = 200; + + hsa_signal_t signal_handle; + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + prev_value = hsa_signal_exchange_release(signal_handle, new_value); + ASSERT_MSG(prev_value == initial_value, "The hsa_signal_exchange_release API failed to return the initial value.\n"); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(new_value == loaded_value, "The hsa_signal_exchange_release API failed to set the new value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_load_acquire.c b/src/core/api/test_hsa_signal_load_acquire.c new file mode 100644 index 0000000..90e1d93 --- /dev/null +++ b/src/core/api/test_hsa_signal_load_acquire.c @@ -0,0 +1,96 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_load_acquire + * + * Purpose: + * Verify if the API works as expected + * + * Description: + * + * Create an new signal, call hsa_signal_load_acquire. + * Check if the signal value loaded is correct. + * + */ + +/** + * + * @Brief: + * implement description + * + * @Return: + * int + * + */ + +int test_hsa_signal_load_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t loaded_value; + loaded_value = hsa_signal_load_acquire(signal_handle); + ASSERT_MSG(initial_value == loaded_value, "The hsa_signal_load_acquire API failed to load the right signal value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_load_relaxed.c b/src/core/api/test_hsa_signal_load_relaxed.c new file mode 100644 index 0000000..7f7b964 --- /dev/null +++ b/src/core/api/test_hsa_signal_load_relaxed.c @@ -0,0 +1,96 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_load_relaxed + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create an new signal, call hsa_signal_load_relaxed. + * Check if the signal value loaded is correct. + * + */ + + /** + * + * @Brief: + * implement description + * + * @Return: + * int + * + */ + +int test_hsa_signal_load_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t loaded_value; + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(initial_value == loaded_value, "The hsa_signal_load_relaxed API failed to load correct signal value!\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_or_acq_rel.c b/src/core/api/test_hsa_signal_or_acq_rel.c new file mode 100644 index 0000000..67a55e1 --- /dev/null +++ b/src/core/api/test_hsa_signal_or_acq_rel.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_or_acq_rel + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call hsa_signal_or_acq_rel API to modify + * the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_or_acq_rel() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, or_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_or_acq_rel(signal_handle, or_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value | or_value), "The hsa_signal_or_acq_rel API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_or_acquire.c b/src/core/api/test_hsa_signal_or_acquire.c new file mode 100644 index 0000000..a5824c3 --- /dev/null +++ b/src/core/api/test_hsa_signal_or_acquire.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_or_acquire + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a new signal and call the hsa_signal_or_acquire API + * to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_or_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, or_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_or_acquire(signal_handle, or_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value | or_value), "The hsa_signal_or_acquire API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_or_relaxed.c b/src/core/api/test_hsa_signal_or_relaxed.c new file mode 100644 index 0000000..40bc800 --- /dev/null +++ b/src/core/api/test_hsa_signal_or_relaxed.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_or_relaxed + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_or_relaxed API + * to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_or_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, or_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_or_relaxed(signal_handle, or_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value | or_value), "The hsa_signal_or_relaxed API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_or_release.c b/src/core/api/test_hsa_signal_or_release.c new file mode 100644 index 0000000..b06758c --- /dev/null +++ b/src/core/api/test_hsa_signal_or_release.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_or_release + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_or_release API + * to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_or_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, or_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_or_release(signal_handle, or_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value | or_value), "The hsa_signal_or_release API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_store_relaxed.c b/src/core/api/test_hsa_signal_store_relaxed.c new file mode 100644 index 0000000..85b0b72 --- /dev/null +++ b/src/core/api/test_hsa_signal_store_relaxed.c @@ -0,0 +1,99 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_store_relaxed + * + * Purpose: + * Verify that if API works as expected. + * + * Description: + * + * 1) Create an signal object and set an new value for the signal. + * Load the value of the signal, check if it is equal to the value written. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_store_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t new_value = 200; + hsa_signal_store_relaxed(signal_handle, new_value); + + // load signal value and store it to initial_value + initial_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(initial_value = new_value, "The hsa_signal_store_relaxed API failed to set the correct value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_store_release.c b/src/core/api/test_hsa_signal_store_release.c new file mode 100644 index 0000000..e957dab --- /dev/null +++ b/src/core/api/test_hsa_signal_store_release.c @@ -0,0 +1,99 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_store_release + * + * Purpose: + * Verify that if API works as expected. + * + * Description: + * + * 1) init hsa Runtime, and create an new signal object, set an new value to signal + * and then, load the value of the signal, check if it is equal to the value written. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_store_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t new_value = 200; + hsa_signal_store_release(signal_handle, new_value); + + // load signal value and store it to initial_value + initial_value = hsa_signal_load_acquire(signal_handle); + ASSERT_MSG(initial_value = new_value, "The hsa_signal_store_release API failed to set the correct value.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_subtract_acq_rel.c b/src/core/api/test_hsa_signal_subtract_acq_rel.c new file mode 100644 index 0000000..0b0e9f4 --- /dev/null +++ b/src/core/api/test_hsa_signal_subtract_acq_rel.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_subtract_acq_rel. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_subtract_acq_rel + * API to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_subtract_acq_rel() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, substracted_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_subtract_acq_rel(signal_handle, substracted_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value - substracted_value), "The hsa_signal_subtract_acq_rel failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_subtract_acquire.c b/src/core/api/test_hsa_signal_subtract_acquire.c new file mode 100644 index 0000000..7fe7190 --- /dev/null +++ b/src/core/api/test_hsa_signal_subtract_acquire.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_subtract_acquire. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_subtract_acquire API + * to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_subtract_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, substracted_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_subtract_acquire(signal_handle, substracted_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value - substracted_value), "The hsa_signal_subtract_acquire API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_subtract_relaxed.c b/src/core/api/test_hsa_signal_subtract_relaxed.c new file mode 100644 index 0000000..2c52c96 --- /dev/null +++ b/src/core/api/test_hsa_signal_subtract_relaxed.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_subtract_relaxed. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_subtract_relaxed API + * to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_subtract_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, substracted_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_subtract_relaxed(signal_handle, substracted_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value - substracted_value), "The hsa_signal_subtract_relaxed API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_subtract_release.c b/src/core/api/test_hsa_signal_subtract_release.c new file mode 100644 index 0000000..3d7be93 --- /dev/null +++ b/src/core/api/test_hsa_signal_subtract_release.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_subtract_release. + * Purpose: + * + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_subtract_release API to + * modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_subtract_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, substracted_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_subtract_release(signal_handle, substracted_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value - substracted_value), "The hsa_signal_subtract_release failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_xor_acq_rel.c b/src/core/api/test_hsa_signal_xor_acq_rel.c new file mode 100644 index 0000000..db6f490 --- /dev/null +++ b/src/core/api/test_hsa_signal_xor_acq_rel.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_xor_acq_rel + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a new signal and call the hsa_signal_xor_acq_rel + * API to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_xor_acq_rel() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, xor_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_xor_acq_rel(signal_handle, xor_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value ^ xor_value), "The hsa_signal_xor_acq_rel API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_xor_acquire.c b/src/core/api/test_hsa_signal_xor_acquire.c new file mode 100644 index 0000000..3a0f6d3 --- /dev/null +++ b/src/core/api/test_hsa_signal_xor_acquire.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_xor_acquire + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and call the hsa_signal_xor_acquire API to + * modify the signals value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_xor_acquire() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, xor_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_xor_acquire(signal_handle, xor_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value ^ xor_value), "The hsa_signal_xor_acquire API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_xor_relaxed.c b/src/core/api/test_hsa_signal_xor_relaxed.c new file mode 100644 index 0000000..034d59c --- /dev/null +++ b/src/core/api/test_hsa_signal_xor_relaxed.c @@ -0,0 +1,98 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_xor_relaxed + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a new signal and call the hsa_signal_xor_relaxed to + * modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_xor_relaxed() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, xor_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_xor_relaxed(signal_handle, xor_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + + ASSERT_MSG(loaded_value == (initial_value ^ xor_value), "The hsa_signal_xor_relaxed API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_signal_xor_release.c b/src/core/api/test_hsa_signal_xor_release.c new file mode 100644 index 0000000..46dc57f --- /dev/null +++ b/src/core/api/test_hsa_signal_xor_release.c @@ -0,0 +1,97 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +/** + * + * Test Name: hsa_signal_xor_release + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) Create a signal and the call the hsa_signal_xor_release API + * to modify the signal value. + * + */ + +/** + * + * @Brief: + * Implement description #1 + * + * @Return: + * int + * + */ + +int test_hsa_signal_xor_release() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 100, xor_value = 50, loaded_value; + + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_xor_release(signal_handle, xor_value); + + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(loaded_value == (initial_value ^ xor_value), "The hsa_signal_xor_release API failed to modify the signal value correctly.\n"); + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_soft_queue_create.c b/src/core/api/test_hsa_soft_queue_create.c new file mode 100644 index 0000000..e9996fa --- /dev/null +++ b/src/core/api/test_hsa_soft_queue_create.c @@ -0,0 +1,181 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: hsa_queue_create + * Purpose: Verify that API of hsa_queue_create() works as expected + * + * Description: + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + + +int test_hsa_soft_queue_create() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t queue_size = 32; + + // Find an agent that supports agent dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_cpu_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Find memory region that is accessible from this agent + hsa_region_t region; + region.handle = (uint64_t)-1; + status = hsa_agent_iterate_regions(agent, + callback_get_region_global_allocatable, ®ion); + + // Use this region to create a soft queue + // no error should occur + hsa_queue_t* queue; + hsa_signal_t doorbell_signal; + doorbell_signal.handle = (uint64_t)-1; + status = hsa_signal_create(0, 0, NULL, &doorbell_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_soft_queue_create(region, queue_size, + HSA_QUEUE_TYPE_SINGLE, HSA_QUEUE_FEATURE_KERNEL_DISPATCH, + doorbell_signal, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + // Verify all the queue members are consistent with the way it was created: + // size, type, features, doorbell_signal + ASSERT(queue->size == queue_size); + ASSERT(queue->type == HSA_QUEUE_TYPE_SINGLE); + ASSERT(queue->features & HSA_QUEUE_FEATURE_KERNEL_DISPATCH); + ASSERT(0 == (queue->features & HSA_QUEUE_FEATURE_AGENT_DISPATCH)); + ASSERT(queue->doorbell_signal.handle == doorbell_signal.handle); + // Verify that both read index and write index are initialized to 0 + uint64_t read_index = hsa_queue_load_read_index_relaxed(queue); + ASSERT(0 == read_index); + uint64_t write_index = hsa_queue_load_write_index_relaxed(queue); + ASSERT(0 == write_index); + // Verify that the header of each packet that is reserved by the queue is + // initialized as HSA_PACKET_TYPE_INVALID + uint16_t packet_type_bits_mask = (1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1; + hsa_kernel_dispatch_packet_t* packets = (hsa_kernel_dispatch_packet_t*)queue->base_address; + uint32_t i; + for (i = 0; i < queue_size; ++i) { + uint16_t packet_type = packets[i].header & packet_type_bits_mask; + ASSERT(HSA_PACKET_TYPE_INVALID == packet_type); + } + // Destroy the soft queue + status = hsa_queue_destroy(queue); + + + // Create a soft queue with invalid parameters: + // (1) queue_size is not power of 2 + status = hsa_soft_queue_create(region, queue_size + 1, + HSA_QUEUE_TYPE_SINGLE, HSA_QUEUE_FEATURE_KERNEL_DISPATCH, + doorbell_signal, &queue); + ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + + // (2) queue_size is 0; + status = hsa_soft_queue_create(region, 0, + HSA_QUEUE_TYPE_SINGLE, HSA_QUEUE_FEATURE_KERNEL_DISPATCH, + doorbell_signal, &queue); + ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + + // (3) type is an invalid type + // status = hsa_soft_queue_create(region, queue_size + // HSA_QUEUE_TYPE_SINGLE + 1, HSA_QUEUE_FEATURE_KERNEL_DISPATCH, + // doorbell_signal, &queue); + // ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + + // (4) doorbell handle is 0 + hsa_signal_t invalid_signal; + invalid_signal.handle = 0; + status = hsa_soft_queue_create(region, queue_size, + HSA_QUEUE_TYPE_SINGLE, HSA_QUEUE_FEATURE_KERNEL_DISPATCH, + invalid_signal, &queue); + ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + + // (5) queue is NULL + status = hsa_soft_queue_create(region, queue_size, + HSA_QUEUE_TYPE_SINGLE, HSA_QUEUE_FEATURE_KERNEL_DISPATCH, + doorbell_signal, NULL); + ASSERT(HSA_STATUS_ERROR_INVALID_ARGUMENT == status); + + // Create soft queue until out-of-resource + uint32_t queues_max = 65536; + hsa_queue_t** queues = (hsa_queue_t**)malloc(sizeof(hsa_queue_t*) * queues_max); + uint32_t queues_created = 0; + for (i = 0; i < queues_max; ++i) { + status = hsa_soft_queue_create(region, queue_size, + HSA_QUEUE_TYPE_SINGLE, HSA_QUEUE_FEATURE_KERNEL_DISPATCH, + doorbell_signal, &queues[i]); + if (HSA_STATUS_SUCCESS == status) { + ++queues_created; + } else if (HSA_STATUS_ERROR_OUT_OF_RESOURCES != status) { + // unexpected error occurred + ASSERT(0); + } else {} + } + + for (i = 0; i < queues_created; ++i) { + status = hsa_queue_destroy(queues[i]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(queues); + + status = hsa_signal_destroy(doorbell_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/api/test_hsa_status_string.c b/src/core/api/test_hsa_status_string.c new file mode 100644 index 0000000..b5f7ce0 --- /dev/null +++ b/src/core/api/test_hsa_status_string.c @@ -0,0 +1,199 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +#define STATUS_STRING_TEST(status_code) \ +{\ + hsa_status_t s; \ + const char* status_string = NULL; \ + s = hsa_status_string(HSA_STATUS_SUCCESS, &status_string); \ + ASSERT_MSG(HSA_STATUS_SUCCESS == s && NULL != status_string, "Failed to return proper value for #status_code!\n"); \ +}\ + +/** + * + * Test Name: hsa_status_string + * + * Purpose: + * Verify that if the API hsa_status_string works as expected + * + * Description: + * + * 1) After init hsa Runtime, call hsa_status_string with different status value + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before init hsa Runtime, call hsa_status_string + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED + * + * 3) After init hsa Runtime, call hsa_status_string with invalid hsa_status_t + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 4) After init hsa Runtime, call hsa_status_string with NULL pointer + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return + * int + * + */ + +int test_hsa_status_string() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + STATUS_STRING_TEST(HSA_STATUS_SUCCESS); + STATUS_STRING_TEST(HSA_STATUS_INFO_BREAK); + STATUS_STRING_TEST(HSA_STATUS_ERROR); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_ARGUMENT); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_QUEUE_CREATION); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_ALLOCATION); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_AGENT); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_REGION); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_SIGNAL); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_QUEUE); + STATUS_STRING_TEST(HSA_STATUS_ERROR_OUT_OF_RESOURCES); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_PACKET_FORMAT); + STATUS_STRING_TEST(HSA_STATUS_ERROR_RESOURCE_FREE); + STATUS_STRING_TEST(HSA_STATUS_ERROR_NOT_INITIALIZED); + STATUS_STRING_TEST(HSA_STATUS_ERROR_REFCOUNT_OVERFLOW); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_INDEX); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_ISA); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_ISA_NAME); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_CODE_OBJECT); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_EXECUTABLE); + STATUS_STRING_TEST(HSA_STATUS_ERROR_FROZEN_EXECUTABLE); + STATUS_STRING_TEST(HSA_STATUS_ERROR_INVALID_SYMBOL_NAME); + STATUS_STRING_TEST(HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED); + STATUS_STRING_TEST(HSA_STATUS_ERROR_VARIABLE_UNDEFINED); + STATUS_STRING_TEST(HSA_STATUS_ERROR_EXCEPTION); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return + * int + * + */ + +int test_hsa_status_string_not_initialized() { + hsa_status_t status; + const char* status_string = NULL; + + status = hsa_status_string(HSA_STATUS_SUCCESS, &status_string); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_status_string API failed to return proper value when the runtime was not initialized.\n"); + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return + * int + * + */ + +int test_hsa_status_string_invalid_status() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + const char* status_string = NULL; + + status = hsa_status_string((uint32_t) -1, &status_string); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "Calling hsa_status_string() with an invalid status code didn't return HSA_STATUS_ERROR_INVALID_ARGUMENT.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_status_string_invalid_ptr() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_status_string(HSA_STATUS_SUCCESS, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "Calling hsa_status_string() with an NULL string pointer didn't return HSA_STATUS_ERROR_INVALID_ARGUMENT.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_system_extension_supported.c b/src/core/api/test_hsa_system_extension_supported.c new file mode 100644 index 0000000..930af11 --- /dev/null +++ b/src/core/api/test_hsa_system_extension_supported.c @@ -0,0 +1,171 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +/** + * + * Test Name: hsa_system_extension_supported + * + * Purpose: + * Verify that if the API hsa_system_extension_supported API + * works as expected. + * + * Description: + * + * 1) After initializing the hsa Runtime, call hsa_system_extension_supported API a + * know extension. Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before initializing the hsa Runtime, call hsa_system_extension_supported API. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_system_extension_supported API with an invalid extension. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 4) Call hsa_system_extension_supported API with a NULL result parameter. + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return + * int + * + */ + +int test_hsa_system_extension_supported() { + bool result; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_system_extension_supported(HSA_EXTENSION_IMAGES, 1, 0, &result); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return + * int + * + */ + +int test_hsa_system_extension_supported_not_initialized() { + bool result; + hsa_status_t status; + + status = hsa_system_extension_supported(HSA_EXTENSION_IMAGES, 1, 0, &result); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_system_extension_supported API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime was not initialized.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return + * int + * + */ + +int test_hsa_system_extension_supported_invalid_extension() { + bool result; + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_system_extension_supported(-1, 1, 0, &result); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_system_extension_supported API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when an invalid extension was specified.\n"); + + status = hsa_shut_down(); + + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_system_extension_supported_null_result_ptr() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_system_extension_supported(HSA_EXTENSION_IMAGES, 1, 0, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_system_extension_supported API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when a NULL result pointer was used.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_system_get_extension_table.c b/src/core/api/test_hsa_system_get_extension_table.c new file mode 100644 index 0000000..3ec8af8 --- /dev/null +++ b/src/core/api/test_hsa_system_get_extension_table.c @@ -0,0 +1,174 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +/** + * + * Test Name: hsa_system_get_extension_table + * + * Purpose: + * Verify that if the API hsa_system_get_extension_table API + * works as expected. + * + * Description: + * + * 1) After initializing the hsa Runtime, call + * hsa_system_get_extension_table API using a known + * extension. Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before init hsa Runtime, call hsa_system_get_extension_table API. + * Check if the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_system_extension_supported API with an invalid + * extension. Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 4) Call hsa_system_extension_supported API with a NULL result + * parameter. Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return + * int + * + */ + +int test_hsa_system_get_extension_table() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + + hsa_ext_images_1_00_pfn_t images_table; + status = hsa_system_get_extension_table(HSA_EXTENSION_IMAGES, 1, 0, &images_table); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get the hsa_ext_images extension's function table.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return + * int + * + */ + +int test_hsa_system_get_extension_table_not_initialized() { + bool result; + hsa_status_t status; + + hsa_ext_images_1_00_pfn_t images_table; + status = hsa_system_get_extension_table(HSA_EXTENSION_IMAGES, 1, 0, &images_table); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "The hsa_system_get_extension_table API failed to return HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime was not initialized.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return + * int + * + */ + +int test_hsa_system_get_extension_table_invalid_extension() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + int table; + status = hsa_system_extension_supported(-1, 1, 0, (void*) &table); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_system_get_extension_table API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when an invalid extension was specified.\n"); + + status = hsa_shut_down(); + + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_system_get_extension_table_null_table_ptr() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_system_get_extension_table(HSA_EXTENSION_IMAGES, 1, 0, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "The hsa_system_get_extension_table API failed to return HSA_STATUS_ERROR_INVALID_ARGUMENT when a NULL table pointer was used.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/api/test_hsa_system_get_info.c b/src/core/api/test_hsa_system_get_info.c new file mode 100644 index 0000000..336d0b4 --- /dev/null +++ b/src/core/api/test_hsa_system_get_info.c @@ -0,0 +1,222 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include + +/** + * + * Test Name: hsa_system_get_info + * + * Purpose: + * Verify that if the API works as expected + * + * Description: + * + * 1) After init HsaRt, call hsa_system_get_info to exam every system attribute, + * Check if the return value is HSA_STATUS_SUCCESS. + * + * 2) Before init HsaRt, call hsa_system_get_info, and check if the return value + * is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) After init HsaRt, call hsa_system_get_info, and pass a invalid hsa_system_info_t value + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + * 4) After init HsaRt, call hsa_system_get_info, and pass a NULL pointer to void* + * Check if the return value is HSA_STATUS_ERROR_INVALID_ARGUMENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_system_get_info() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get info of HSA_SYSTEM_INFO_VERSION_MAJOR + { + uint16_t major_version = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &major_version); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get major version info.\n"); + } + + // Get info of HSA_SYSTEM_INFO_VERSION_MINOR + { + uint16_t minor_version = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &minor_version); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get minor version info\n"); + } + + // Get info of HSA_SYSTEM_INFO_TIMESTAMP + { + uint64_t time_stamp = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &time_stamp); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get time stamp.\n"); + } + + // Get info of HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY + { + uint16_t frequency = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &frequency); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get time stamp frequency.\n"); + } + + // Get info of HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT + { + uint64_t max_wait = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &max_wait); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get signal max wait.\n"); + } + + // Get info of HSA_SYSTEM_INFO_ENDIANNESS + { + hsa_endianness_t endianness; + status = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &endianness); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get endianness.\n"); + } + + // Get info of HSA_SYSTEM_INFO_MACHINE_MODEL + { + hsa_machine_model_t machine_model; + status = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get machine_model.\n"); + } + + // Get info of HSA_SYSTEM_INFO_EXTENSIONS + { + uint8_t extensions[128]; + status = hsa_system_get_info(HSA_SYSTEM_INFO_EXTENSIONS, &extensions); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get extensions.\n"); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_system_get_info_not_initialized() { + hsa_status_t status; + + uint16_t major_version = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &major_version); + ASSERT_MSG(HSA_STATUS_ERROR_NOT_INITIALIZED == status, "Failed to return proper value when calling hsa_system_get_info before initialization.\n"); + + return 0; +} + +/** + * + * @Brief: + * Implement description #3 + * + * @Return: + * int + * + */ + +int test_hsa_system_get_info_invalid_attribute() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // here, pass integer 10 to attribute + + uint64_t test = 0; + status = hsa_system_get_info(10, &test); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "Failed to return proper value when passing invalid attribute to hsa_system_get_info.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_hsa_system_get_info_invalid_ptr() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, NULL); + ASSERT_MSG(HSA_STATUS_ERROR_INVALID_ARGUMENT == status, "Failed to return proper value when passing NULL.\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/aql/hsa_aql.c b/src/core/aql/hsa_aql.c new file mode 100644 index 0000000..b3fa8e2 --- /dev/null +++ b/src/core/aql/hsa_aql.c @@ -0,0 +1,81 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include "hsa_aql.h" + +DEFINE_TEST(aql_launch_size) +DEFINE_TEST(aql_barrier_bit_not_set) +DEFINE_TEST(aql_barrier_bit_set) +DEFINE_TEST(aql_barrier_cross_queue_dependency) +DEFINE_TEST(aql_barrier_cross_queue_dependency_negative_value) +DEFINE_TEST(aql_barrier_multiple_barriers) +DEFINE_TEST(aql_barrier_and) +DEFINE_TEST(aql_barrier_or) +DEFINE_TEST(aql_group_memory) +DEFINE_TEST(aql_group_memory_overspecified) +DEFINE_TEST(aql_private_memory) +DEFINE_TEST(aql_private_memory_overspecified) +DEFINE_TEST(aql_zero_wg_size) + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(aql_launch_size); + ADD_TEST(aql_barrier_bit_not_set); + ADD_TEST(aql_barrier_bit_set); + ADD_TEST(aql_barrier_cross_queue_dependency) + ADD_TEST(aql_barrier_cross_queue_dependency_negative_value) + ADD_TEST(aql_barrier_multiple_barriers) + ADD_TEST(aql_barrier_and); + ADD_TEST(aql_barrier_or); + ADD_TEST(aql_group_memory) + ADD_TEST(aql_group_memory_overspecified) + ADD_TEST(aql_private_memory) + ADD_TEST(aql_private_memory_overspecified) + ADD_TEST(aql_zero_wg_size) + RUN_TESTS(); +} + diff --git a/src/core/aql/hsa_aql.h b/src/core/aql/hsa_aql.h new file mode 100644 index 0000000..d92cad4 --- /dev/null +++ b/src/core/aql/hsa_aql.h @@ -0,0 +1,61 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_AQL_H_ +#define _HSA_AQL_H_ +extern int test_aql_launch_size(); +extern int test_aql_barrier_bit_set(); +extern int test_aql_barrier_bit_not_set(); +extern int test_aql_barrier_cross_queue_dependency(); +extern int test_aql_barrier_cross_queue_dependency_negative_value(); +extern int test_aql_barrier_multiple_barriers(); +extern int test_aql_barrier_and(); +extern int test_aql_barrier_or(); +extern int test_aql_group_memory(); +extern int test_aql_group_memory_overspecified(); +extern int test_aql_private_memory(); +extern int test_aql_private_memory_overspecified(); +extern int test_aql_zero_wg_size(); +#endif // _HSA_AQL_H_ diff --git a/src/core/aql/test_aql_barrier_and.c b/src/core/aql/test_aql_barrier_and.c new file mode 100644 index 0000000..55fef91 --- /dev/null +++ b/src/core/aql/test_aql_barrier_and.c @@ -0,0 +1,245 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_barrier_AND + * Scope: Conformance + * + * Purpose: Verifies that a Barrier-AND packet allows an + * application to specify up to five signal dependencies + * and requires the packet processor to resolve those dependencies + * before it can proceed. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) Load and initialize the no-op kernel. + * 3) Select an agent from the list and create a queue. + * 4) Create 6 signals associated with the agent (or all agents), and + * initialize all signals to have a value of 1. + * 5) Enqueue a barrier packet to the queue and use n of the signals + * as dependencies. The variable n has a value from 1 to 5. + * 6) Enqueue a dispatch packet that executes the no-op kernel and uses + * the final signal as the completion signal. + * 7) Check the completion signal of the no-op kernel, and verify that + * it has not decremented. + * 8) Set one of the barrier's dependant signal values to 0 and repeat + * step 7 until all n signals have been decremented. + * 9) Wait until the kernels completion signal has decremented. + * + * Expected results: The kernels completion signal should not decrement until + * all of the barrier's dependant signals have decremented. + * + */ + +#include +#include +#include +#include +#include + +int test_aql_barrier_and() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create six signals + int jj; + hsa_signal_t signals[6]; + for (jj = 0; jj < 6; ++jj) { + status = hsa_signal_create((hsa_signal_value_t) 1, 0, NULL, &signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + + // Create and enqueue a barrier packet + uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1); + hsa_barrier_and_packet_t* barrier_packet = (hsa_barrier_and_packet_t*)(queue->base_address + packet_id * packet_size); + memset(barrier_packet, 0, packet_size); + + + // Add dependency signals to the barrier packet + for (jj = 0; jj < 5; ++jj) { + barrier_packet->dep_signal[jj] = signals[jj]; + } + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&barrier_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Create and enqueue a dispatch kernel + packet_id = hsa_queue_add_write_index_relaxed(queue, 1); + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)(queue->base_address + packet_id * packet_size); + memset(dispatch_packet, 0, packet_size); + dispatch_packet->completion_signal = signals[5]; + dispatch_packet->setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = 256; + dispatch_packet->workgroup_size_y = 1; + dispatch_packet->workgroup_size_z = 1; + dispatch_packet->grid_size_x = 256; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->group_segment_size = symbol_record.group_segment_size; + dispatch_packet->private_segment_size = symbol_record.private_segment_size; + dispatch_packet->kernel_object = symbol_record.kernel_object; + + header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Decrement signal 1-4, the barrier packet should not complete until + // the 5th signal is triggered + for (jj = 0; jj < 4; ++jj) { + hsa_signal_store_release(signals[jj], 0); + uint32_t time_now = time(NULL); + if (0 == hsa_signal_wait_relaxed(signals[5], HSA_SIGNAL_CONDITION_EQ, 0, time_now + 1, HSA_WAIT_STATE_BLOCKED)) { + // signals[5] should never be decremented + ASSERT(0); + } + } + + // Decrement signal 5 + hsa_signal_store_release(signals[4], 0); + // The barrier packet should be processed. + + // Wait for the no_op kernel to finish + while (0 != hsa_signal_wait_relaxed(signals[5], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)); + + // Destroy all signals + for (jj = 0; jj < 6; ++jj) { + status = hsa_signal_destroy(signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_barrier_bit_not_set.c b/src/core/aql/test_aql_barrier_bit_not_set.c new file mode 100644 index 0000000..dbf0156 --- /dev/null +++ b/src/core/aql/test_aql_barrier_bit_not_set.c @@ -0,0 +1,305 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * + * Test Name: aql_barrier_bit_not_set + * Scope: Conformance + * + * Purpose: Verifies that if a dispatch packet does not have the barrier bit + * set, all proceeding packets don't have to complete before the dispatch + * packet executes. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) Load and initialize the signal_st_rlx and the signal_wait_rlx kernels. + * 3) Select an agent from the list and create a queue. + * 4) Create a test signal with an initial value of 1. + * 5) Create several completion signals for dispatches. + * 6) Enqueue several dispatch packets to the queue that have the barrier bit + * set to 0, and that all execute the signal_wait_rlx kernel on the created + * test signal and wait for it to have a value of 1. Each should have its + * own completion signal. + * 7) Enqueue a signal dispatch packet that has the barrier bit set to 0, + * and that executes the signal_st_rlx kernel on the created test signal, setting + * the value to 0. It should also have its own completion signal. + * 8) Wait on each of the completion signals of the wait kernels. + * 9) Finally, wait on the completion signal of the set kernel. + * + * Expected results: None of the wait kernels should block the completion of the + * set kernel. + * + */ + +#include +#include +#include +#include +#include + +#define NUM_WAIT_KERNELS 8 + +int test_aql_barrier_bit_not_set() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("signal_operations.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if a queue on this agent support HSA_QUEUE_TYPE_MULTI + hsa_queue_type_t queue_type; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_QUEUE_TYPE_MULTI != queue_type) { + continue; + } + + // Find the global memory region + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 != global_region.handle) { + // Skip the test if the agent doesn't support fine grained memory + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record[2]; + memset(&symbol_record, 0, sizeof(symbol_record)); + + char* symbol_names[2]; + symbol_names[0] = "&__signal_st_rlx_kernel"; + symbol_names[1] = "&__signal_wait_eq_rlx_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 2, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) signal_args_s { + uint32_t count; + hsa_signal_t* signal_handles; + hsa_signal_value_t* signal_values; + } signal_args_t; + signal_args_t signal_args; + + // Allocate the kernel argument buffer from the correct region + // Assume that the size will be the same for both wait and set kernels + signal_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record[1].kernarg_segment_size, + (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate and initialize space for the wait value + // parameter + hsa_signal_value_t *wait_value; + status = hsa_memory_allocate(global_region, sizeof(hsa_signal_value_t), (void**) &wait_value); + ASSERT(HSA_STATUS_SUCCESS == status); + *wait_value = 0; + + // Create the test signal + hsa_signal_t test_signal; + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &test_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Fill in the kernel argument list + signal_args.count = 1; + signal_args.signal_handles = &test_signal; + signal_args.signal_values = wait_value; + memcpy(kernarg_buffer, &signal_args, symbol_record[1].kernarg_segment_size); + + // Create the set kernel completion signal + hsa_signal_t set_kernel_completion_signal; + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &set_kernel_completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the wait kernel completion signals + hsa_signal_t wait_kernel_completion_signal[NUM_WAIT_KERNELS]; + int jj; + for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) { + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &wait_kernel_completion_signal[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= 0 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = symbol_record[1].kernel_object; + dispatch_packet.group_segment_size = symbol_record[1].group_segment_size; + dispatch_packet.private_segment_size = symbol_record[1].private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + + // Dispatch the wait kernels + for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) { + // Set the appropriate completion signal + dispatch_packet.completion_signal = wait_kernel_completion_signal[jj]; + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + } + + // Dispatch the set kernel + // Set the appropriate completion signal and code descriptor values + dispatch_packet.completion_signal = set_kernel_completion_signal; + dispatch_packet.kernel_object = symbol_record[0].kernel_object; + dispatch_packet.group_segment_size = symbol_record[0].group_segment_size; + dispatch_packet.private_segment_size = symbol_record[0].private_segment_size; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Wait on the completion signal of the set kernel + while (hsa_signal_wait_acquire(set_kernel_completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) {} + + // Wait on the completion signals of each of the wait kernels + for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) { + while (hsa_signal_wait_relaxed(wait_kernel_completion_signal[jj], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) {} + status = hsa_signal_destroy(wait_kernel_completion_signal[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Check that the value of the test_signal is modified + hsa_signal_value_t signal_value; + do { + signal_value = hsa_signal_wait_relaxed(test_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + } while (signal_value != 0); + + // Destroy the test completion signal + status = hsa_signal_destroy(test_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the set kernel completion signal + status = hsa_signal_destroy(set_kernel_completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(wait_value); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_barrier_bit_set.c b/src/core/aql/test_aql_barrier_bit_set.c new file mode 100644 index 0000000..277d045 --- /dev/null +++ b/src/core/aql/test_aql_barrier_bit_set.c @@ -0,0 +1,326 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * + * Test Name: aql_barrier_bit_set + * Scope: Conformance + * + * Purpose: Verifies that if a dispatch packet hsa the barrier bit set, all + * proceeding packets must complete before processing of this packet occurs. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) Load and initialize the signal_st_rlx and the signal_wait_rlx kernels. + * 3) Select an agent from the list and create a queue. + * 4) Create a test signal with an initial value of 1. + * 5) Create several completion signals for dispatches. + * 6) Enqueue several dispatch packets to the queue that have the barrier bit + * set to 0, and that all execute the signal_wait_rlx kernel on the created + * test signal and wait for it to have a value of 1. Each should have its + * own completion signal. + * 7) Enqueue a signal dispatch packet that has the barrier bit set to 1, + * and that executes the signal_st_rlx kernel on the created test signal, setting + * the value to 0. It should also have its own completion signal. + * 8) Wait on the completion signal of the dispatch packet with the barrier bit + * set, with a timeout specified. + * 9) After the timeout has elapsed, check the other completion signals and + * verify that the kernels are not yet complete. + * 10) Set the value of the test signal to 0 on the host. + * 11) Wait on on the completion signal of the dispatch packet with the barrier bit + * set, this time without a timeout. + * + * Expected results: None of the wait kernels should complete until the host sets + * the signal value. The dispatch signal_st_rlx kernel should only execute after + * they have all completed. + * + */ + +#include +#include +#include +#include +#include + +#define NUM_WAIT_KERNELS 8 + +int test_aql_barrier_bit_set() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("signal_operations.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if a queue on this agent support HSA_QUEUE_TYPE_MULTI + hsa_queue_type_t queue_type; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_QUEUE_TYPE_MULTI != queue_type) { + continue; + } + + // Find the global memory region that is fine grained + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 != global_region.handle) { + // Skip this agent if a fine grained memory region isn't available. + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record[2]; + memset(&symbol_record, 0, sizeof(symbol_record)); + + char* symbol_names[2]; + symbol_names[0] = "&__signal_st_rlx_kernel"; + symbol_names[1] = "&__signal_wait_eq_rlx_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 2, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) signal_args_s { + uint32_t count; + hsa_signal_t* signal_handles; + hsa_signal_value_t* signal_values; + } signal_args_t; + signal_args_t signal_args; + + // Allocate the kernel argument buffer from the correct region + // Assume the size is the same for both wait and set kernels + signal_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record[1].kernarg_segment_size, + (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the test signal. + hsa_signal_t test_signal; + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &test_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate and initialize space for the wait value + // parameter + hsa_signal_value_t *wait_value; + status = hsa_memory_allocate(global_region, sizeof(hsa_signal_value_t), (void**) &wait_value); + ASSERT(HSA_STATUS_SUCCESS == status); + *wait_value = 0; + + // Fill in the kernel argument list + signal_args.count = 1; + signal_args.signal_handles = &test_signal; + signal_args.signal_values = wait_value; + memcpy(kernarg_buffer, &signal_args, symbol_record[1].kernarg_segment_size); + + // Create the set kernel completion signal + hsa_signal_t set_kernel_completion_signal; + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &set_kernel_completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the wait kernel completion signals + hsa_signal_t wait_kernel_completion_signal[NUM_WAIT_KERNELS]; + int jj; + for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) { + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &wait_kernel_completion_signal[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 0 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = symbol_record[1].kernel_object; + dispatch_packet.group_segment_size = symbol_record[1].group_segment_size; + dispatch_packet.private_segment_size = symbol_record[1].private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + + // Dispatch the wait kernels + for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) { + // Set the appropriate completion signal + dispatch_packet.completion_signal = wait_kernel_completion_signal[jj]; + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + } + + // Dispatch the set kernel, setting the barrier bit to 1 + dispatch_packet.header |= 1 == HSA_PACKET_HEADER_BARRIER; + + // Set the appropriate completion signal and code descriptor values + dispatch_packet.kernel_object = symbol_record[0].kernel_object; + dispatch_packet.group_segment_size = symbol_record[0].group_segment_size; + dispatch_packet.private_segment_size = symbol_record[0].private_segment_size; + + // Dispatch the set kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Query the systems timestamp frequency for wait timeout + uint16_t freq; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, (void*) &freq); + + // Wait on the completion signal of the set kernel, but + // timeout after 1 second + uint64_t wait_time = (uint64_t) freq; + hsa_signal_value_t signal_value; + signal_value = hsa_signal_wait_relaxed(set_kernel_completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE); + ASSERT(1 == signal_value); + + // Wait on the completion signals of each of the wait kernels, again timing out after 1 second + for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) { + signal_value = hsa_signal_wait_relaxed(wait_kernel_completion_signal[jj], HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE); + ASSERT(1 != signal_value); + } + + // Set the test_signal value to 0 from the host + hsa_signal_store_relaxed(test_signal, 0); + + // Wait on the completion signal of the set kernel again, but + // no timeout should be specified + while (hsa_signal_wait_relaxed(set_kernel_completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) {} + + // Destroy the set kernel completion signal + status = hsa_signal_destroy(set_kernel_completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Wait on the values of the wait kernel signals + // Destroy the signal at that time. + for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) { + while (hsa_signal_wait_relaxed(wait_kernel_completion_signal[jj], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) {} + status = hsa_signal_destroy(wait_kernel_completion_signal[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the test completion signal + status = hsa_signal_destroy(test_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(wait_value); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_barrier_cross_queue_dependency.c b/src/core/aql/test_aql_barrier_cross_queue_dependency.c new file mode 100644 index 0000000..53d6999 --- /dev/null +++ b/src/core/aql/test_aql_barrier_cross_queue_dependency.c @@ -0,0 +1,184 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_barrier_cross_queue_dependency + * Scope: Conformance + * + * Purpose: Verifies that that barrier packets can utilize + * dependency signals across dispatch queues. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) If the agent list has less than two agents the test cannot be run. Pass + * the test. + * 3) Create a queue for each agent in the list. + * 4) If n is the number of agents, create n+1 signals, initialized to 1. + * 5) For each agent, identified with an index i, enqueue a barrier packet + * using signal i as the dependency signal and i+1 as the completion signal. + * The final signal will not be a dependency signal but will be the final + * test completion signal. + * 6) Check the final signal's value and make sure it hasn't been modified before + * the first signal's value is set. + * 7) Set the value of the first signal to 0. + * 8) Wait on the final signal until its value is 0. + * 9) Check all of the intermediate signals and verify that they have a signal + * value of 0. + * + * Expected results: Modifying the first signal's value to 0 should propagate the + * change to all barrier packet values. + * + */ + +#include +#include +#include + +int test_aql_barrier_cross_queue_dependency() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Collect all the dispatch agents + int num_dispatch_agents = 0; + hsa_agent_t dispatch_agents[agent_list.num_agents]; + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + dispatch_agents[num_dispatch_agents] = agent_list.agents[ii]; + ++num_dispatch_agents; + } + + // Skip this test case if the number of dispatch agents is less than 2 + if (num_dispatch_agents < 2) { + // Number of dispatch agents is less than 2, skip Cross Dependency test + free_agent_list(&agent_list); + return 0; + } + + // Create signals + hsa_signal_t signals[num_dispatch_agents + 1]; + for (ii = 0; ii < num_dispatch_agents + 1; ++ii) { + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &signals[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Create a queue on each dispatch agent, enqueue a barrier on this queue + hsa_queue_t* queues[num_dispatch_agents]; + for (ii = 0; ii < num_dispatch_agents; ++ii) { + // Create a queue + status = hsa_queue_create(dispatch_agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &(queues[ii])); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Enqueue a barrier + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + uint64_t packet_id = hsa_queue_add_write_index_relaxed(queues[ii], 1); + hsa_barrier_and_packet_t* barrier_packet = (hsa_barrier_and_packet_t*)(queues[ii]->base_address + packet_id * packet_size); + memset(barrier_packet, 0, packet_size); + // Set the dependency signal + barrier_packet->dep_signal[0] = signals[ii]; + // Set the completion signal + barrier_packet->completion_signal = signals[ii + 1]; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&barrier_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the barrier packet + hsa_signal_store_release(queues[ii]->doorbell_signal, packet_id); + } + + // Verify none of the signals has been changed + for (ii = 0; ii < num_dispatch_agents+1; ++ii) { + int signal_value = (int)hsa_signal_load_relaxed(signals[ii]); + ASSERT(1 == signal_value); + } + + // Trigger the barrier in the 1st queue + hsa_signal_store_release(signals[0], 0); + + // Wait for all barriers to complete + while (0 != hsa_signal_wait_relaxed(signals[num_dispatch_agents], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)); + + // Verify all signals have been updated + for (ii = 0; ii < num_dispatch_agents+1; ++ii) { + int signal_value = (int)hsa_signal_load_relaxed(signals[ii]); + ASSERT(0 == signal_value); + } + + // Destroy all signals + for (ii = 0; ii < num_dispatch_agents+1; ++ii) { + status = hsa_signal_destroy(signals[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy all queues + for (ii = 0; ii < num_dispatch_agents; ++ii) { + status = hsa_queue_destroy(queues[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_barrier_cross_queue_dependency_negative_value.c b/src/core/aql/test_aql_barrier_cross_queue_dependency_negative_value.c new file mode 100644 index 0000000..ff6af74 --- /dev/null +++ b/src/core/aql/test_aql_barrier_cross_queue_dependency_negative_value.c @@ -0,0 +1,185 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_barrier_cross_queue_dependency_negative_value + * Scope: Conformance + * + * Purpose: Verifies that that barrier packets can utilize + * dependency signals across dispatch queues, and that an + * error condition is properly propagated across queues. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) If the agent list has less than two agents the test cannot be run. Pass + * the test. + * 3) Create a queue for each agent in the list. + * 4) If n is the number of agents, create n+1 signals, initialized to 1. + * 5) For each agent, identified with an index i, enqueue a barrier packet + * using signal i as the dependency signal and i+1 as the completion signal. + * The final signal will not be a dependency signal but will be the final + * test completion signal. + * 6) Check the final signal's value and make sure it hasn't been modified before + * the first signal's value is set. + * 7) Set the value of the first signal to -1. + * 8) Wait on the final signal until its value less than 0. + * 9) Check all of the intermediate signals and verify that they have a signal + * value less than 0. + * + * Expected results: Modifying the first signal's value to -1 should propagate the + * error to all barrier packet values. + * + */ + +#include +#include +#include + +int test_aql_barrier_cross_queue_dependency_negative_value() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Collect all the dispatch agents + int num_dispatch_agents = 0; + hsa_agent_t dispatch_agents[agent_list.num_agents]; + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + dispatch_agents[num_dispatch_agents] = agent_list.agents[ii]; + ++num_dispatch_agents; + } + + // Skip this test case if the number of dispatch agents is less than 2 + if (num_dispatch_agents < 2) { + // Number of dispatch agents is less than 2, skip Cross Dependency Negative Value test + free_agent_list(&agent_list); + return 0; + } + + // Create signals + hsa_signal_t signals[num_dispatch_agents + 1]; + for (ii = 0; ii < num_dispatch_agents+1; ++ii) { + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &signals[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Create a queue on each dispatch agent, enqueue a barrier on this queue + hsa_queue_t* queues[num_dispatch_agents]; + for (ii = 0; ii < num_dispatch_agents; ++ii) { + // Create a queue + status = hsa_queue_create(dispatch_agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queues[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Enqueue a barrier + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + uint64_t packet_id = hsa_queue_add_write_index_relaxed(queues[ii], 1); + hsa_barrier_and_packet_t* barrier_packet = (hsa_barrier_and_packet_t*)(queues[ii]->base_address + packet_id * packet_size); + memset(barrier_packet, 0, packet_size); + // Set the dependency signal + barrier_packet->dep_signal[0] = signals[ii]; + // Set the completion signal + barrier_packet->completion_signal = signals[ii + 1]; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&barrier_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the barrier packet + hsa_signal_store_release(queues[ii]->doorbell_signal, packet_id); + } + + // Verify none of the signals has been changed + for (ii = 0; ii < num_dispatch_agents+1; ++ii) { + int signal_value = (int)hsa_signal_load_relaxed(signals[ii]); + ASSERT(1 == signal_value); + } + + // Trigger the barrier in the 1st queue + hsa_signal_store_release(signals[0], -1); + + // Wait for all barriers to complete + while (0 != hsa_signal_wait_relaxed(signals[num_dispatch_agents], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)); + + // All signals should have a negative value + for (ii = 0; ii < num_dispatch_agents+1; ++ii) { + int signal_value = (int)hsa_signal_load_relaxed(signals[ii]); + ASSERT(signal_value < 0); + } + + // Destroy all signals + for (ii = 0; ii < num_dispatch_agents+1; ++ii) { + status = hsa_signal_destroy(signals[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy all queues + for (ii = 0; ii < num_dispatch_agents; ++ii) { + status = hsa_queue_destroy(queues[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_barrier_multiple_barriers.c b/src/core/aql/test_aql_barrier_multiple_barriers.c new file mode 100644 index 0000000..8c25c4d --- /dev/null +++ b/src/core/aql/test_aql_barrier_multiple_barriers.c @@ -0,0 +1,266 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_barrier_multiple_barriers + * Scope: Conformance + * + * Purpose: Verifies that several barrier packets and dispatch + * packets can be enqueued and that the barrier packets + * halt execution as expected. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) Select an agent from the list and create a queue. + * 3) Load and initialize the no_op kernel. + * 4) Create a dependency signal and a completion signal, with initial + * values of 1, and enqueue a barrier packet to the queue using these + * signals appropriately. + * 5) Create a kernel completion signal and enqueue a dispatch packet + * and the no_op kernel. + * 6) Repeat steps 4 and 5 until the queue is nearly full. + * 7) Check all of the completion signals, for both barrier packets + * and dispatch packets, and ensure that they are 1. + * 8) Iterate through the barrier packets, in order, doing the following: + * a) Set the barrier packet's dependency signal's value to 0. + * b) Wait until the barrier packet's completion signal is 0. + * c) Wait until the following dispatch packet's completion signal is 0. + * d) Check the following barrier packet's completion signal's value, and + * verify it is still 1. + * + * Expected results: Each barrier packet should impose a barrier, halting + * execution of the packets on the queue until the barrier's dependency + * signal is decremented to 0. + * + */ + +#include +#include +#include +#include + +int test_aql_barrier_multiple_barriers() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Must be power of 2 minus 1 + const int repeat_count = 7; + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 2 * (repeat_count + 1), HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create six signals + int kk; + hsa_signal_t dependency_signals[repeat_count]; + hsa_signal_t barrier_completion_signals[repeat_count]; + hsa_signal_t kernel_completion_signals[repeat_count]; + for (kk = 0; kk < repeat_count; ++kk) { + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &dependency_signals[kk]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &barrier_completion_signals[kk]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &kernel_completion_signals[kk]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + + // Repeat to enqueue the barrier and the dispatch kernel + for (kk = 0; kk < repeat_count; ++kk) { + // Create and enqueue a barrier packet + uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1); + hsa_barrier_and_packet_t* barrier_packet = (hsa_barrier_and_packet_t*)(queue->base_address + packet_id * packet_size); + memset(barrier_packet, 0, packet_size); + barrier_packet->dep_signal[0] = dependency_signals[kk]; + barrier_packet->completion_signal = barrier_completion_signals[kk]; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&barrier_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the barrier packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Create and enqueue a dispatch kernel + packet_id = hsa_queue_add_write_index_relaxed(queue, 1); + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)(queue->base_address + packet_id * packet_size); + memset(dispatch_packet, 0, packet_size); + dispatch_packet->completion_signal = kernel_completion_signals[kk]; + dispatch_packet->setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = 256; + dispatch_packet->workgroup_size_y = 1; + dispatch_packet->workgroup_size_z = 1; + dispatch_packet->grid_size_x = 256; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->kernel_object = symbol_record.kernel_object; + + header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + } + + // Verify that all completion signals have not been changed + for (kk = 0; kk < repeat_count; ++kk) { + int signal_value; + signal_value = (int)hsa_signal_load_relaxed(barrier_completion_signals[kk]); + ASSERT(1 == signal_value); + signal_value = (int)hsa_signal_load_relaxed(kernel_completion_signals[kk]); + ASSERT(1 == signal_value); + } + + // Set the dependency signal of each barrier packet + for (kk = 0; kk < repeat_count; ++kk) { + // Set the barrier's dependency signal + hsa_signal_store_release(dependency_signals[kk], 0); + + // Wait for the barrier to complete + while (0 != hsa_signal_wait_relaxed(barrier_completion_signals[kk], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)); + + // Wait for the kernel to complete + while (0 != hsa_signal_wait_relaxed(kernel_completion_signals[kk], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)); + + // Verify the following barrier is still in effect + if (kk + 1 < repeat_count) { + int signal_value; + signal_value = (int)hsa_signal_load_relaxed(barrier_completion_signals[kk + 1]); + ASSERT(1 == signal_value); + } + } + + // Destroy all signals + for (kk = 0; kk < repeat_count; ++kk) { + status = hsa_signal_destroy(dependency_signals[kk]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_destroy(barrier_completion_signals[kk]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_destroy(kernel_completion_signals[kk]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_barrier_negative_value.c b/src/core/aql/test_aql_barrier_negative_value.c new file mode 100644 index 0000000..b4c7a50 --- /dev/null +++ b/src/core/aql/test_aql_barrier_negative_value.c @@ -0,0 +1,172 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_barrier_negative_value + * Scope: Conformance + * + * Purpose: Verifies that a Barrier packet allows an + * application to specify up to five signal dependencies + * and, if any of the dependent signal values become negative, + * the packet processor assigns an error value to the completion + * signal. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) Select an agent from the list and create a queue. + * 3) Create 6 signals associated with the agent (or all agents), and + * initialize all signals to have a value of 1. + * 4) Enqueue a barrier packet to the queue and use 5 of the signals + * as dependencies. + * 5) The final signal should be used as the barrier packets completion signal. + * 6) Set one of the barrier's dependant signal values to -1. + * 7) Wait for the barrier packet's completion signal's value to become + * negative, indicating that the packet processor has assigned a correct + * error value. + * 8) Repeat the test using a different signal in the dep_signal array. All + * indexes should be checked. + * + * Expected results: When any of the dependency signal's value becomes + * negative, the completion signal of the barrier packet should indicate + * that an error occured. + * + */ + +#include +#include +#include + +int test_aql_barrier_negative_value() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + int jj; + // Create six signals + hsa_signal_t signals[6]; + for (jj = 0; jj < 6; ++jj) { + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + + // Repeat on setting the kk-th signal to execute the barrier + int kk; + for (kk = 0; kk < 5; ++kk) { + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create and enqueue a barrier packet + uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1); + hsa_barrier_and_packet_t* barrier_packet = (hsa_barrier_and_packet_t*)(queue->base_address + packet_id * packet_size); + memset(barrier_packet, 0, packet_size); + barrier_packet->completion_signal = signals[5]; + + // Add dependency signals to the barrier packet + for (jj = 0; jj < 5; ++jj) { + barrier_packet->dep_signal[jj] = signals[jj]; + } + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; + __atomic_store_n((uint16_t*)(&barrier_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the barrier packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Assign a negative value to the kk-th signal + hsa_signal_store_release(signals[kk], -1); + + // The barrier packet's completion signal should be set to a + // negative value by the runtime. + // Wait for the negative value on the 5th signal + hsa_signal_wait_relaxed(signals[5], HSA_SIGNAL_CONDITION_LT, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Reinitialize the signals for next iteration + // Decrement signal 5 + hsa_signal_store_release(signals[5], 1); + // Decrement signal kk + hsa_signal_store_release(signals[kk], 1); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy all signals + for (jj = 0; jj < 6; ++jj) { + status = hsa_signal_destroy(signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_barrier_or.c b/src/core/aql/test_aql_barrier_or.c new file mode 100644 index 0000000..5d77775 --- /dev/null +++ b/src/core/aql/test_aql_barrier_or.c @@ -0,0 +1,247 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_barrier_OR + * Scope: Conformance + * + * Purpose: Verifies that a Barrier-OR packet allows an + * application to specify up to five signal dependencies + * and requires the packet processor to resolve one of those + * dependencies before it can proceed. + * + * Test Description: + * 1) Generate a list of agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) Load and initialize the no-op kernel. + * 3) Select an agent from the list and create a queue. + * 4) Create 6 signals associated with the agent (or all agents), and + * initialize all signals to have a value of 1. + * 5) Enqueue a barrier packet to the queue and use 5 of the signals + * as dependencies. + * 6) Enqueue a dispatch packet that executes the no-op kernel and uses + * the final signal as the completion signal. + * 7) Check the completion signal of the no-op kernel, and verify that + * it has not decremented. + * 8) Set one of the barrier's dependant signal values to 0. + * 9) Wait until the kernels completion signal has decremented. + * 10) Repeat steps 5 to 9 with reinitialized signals, and decrement + * a different signal index for the barrier packet. + * + * Expected results: The kernels completion signal should not decrement until + * one of the barrier's dependant signals have decremented. All of the barrier + * packets dependency slots should be verified. + * + */ + +#include +#include +#include +#include +#include + +int test_aql_barrier_or() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + int jj; + // Create six signals + hsa_signal_t signals[6]; + for (jj = 0; jj < 6; ++jj) { + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + + // Repeat on setting the kk-th signal to execute the barrier + int kk; + for (kk = 0; kk < 5; ++kk) { + // Create and enqueue a barrier packet + uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1); + hsa_barrier_or_packet_t* barrier_packet = (hsa_barrier_or_packet_t*)(queue->base_address + packet_id * packet_size); + memset(barrier_packet, 0, packet_size); + + // Add dependency signals to the barrier packet + for (jj = 0; jj < 5; ++jj) { + barrier_packet->dep_signal[jj] = signals[jj]; + } + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_BARRIER_OR << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&barrier_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the barrier packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Create and enqueue a dispatch kernel + packet_id = hsa_queue_add_write_index_relaxed(queue, 1); + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)(queue->base_address + packet_id * packet_size); + memset(dispatch_packet, 0, packet_size); + dispatch_packet->completion_signal = signals[5]; + dispatch_packet->setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = 256; + dispatch_packet->workgroup_size_y = 1; + dispatch_packet->workgroup_size_z = 1; + dispatch_packet->grid_size_x = 256; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->kernel_object = symbol_record.kernel_object; + + header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Verify the kernel has not been launched + uint32_t time_now = time(NULL); + if (0 == hsa_signal_wait_relaxed(signals[5], HSA_SIGNAL_CONDITION_EQ, 0, time_now + 1, HSA_WAIT_STATE_BLOCKED)) { + // signals[5] should not be decremented + ASSERT(0); + } + + // Decrement only the kk-th signal, the barrier packet should be executed + hsa_signal_store_release(signals[kk], 0); + // Wait for the no_op kernel to finish + while (0 != hsa_signal_wait_relaxed(signals[5], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)); + + // Reinitialize the signals for next iteration + // Decrement signal 5 + hsa_signal_store_release(signals[5], 1); + // Decrement signal kk + hsa_signal_store_release(signals[kk], 1); + } + + // Destroy all signals + for (jj = 0; jj < 6; ++jj) { + status = hsa_signal_destroy(signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_group_memory.c b/src/core/aql/test_aql_group_memory.c new file mode 100644 index 0000000..163b972 --- /dev/null +++ b/src/core/aql/test_aql_group_memory.c @@ -0,0 +1,225 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_group_memory + * Scope: Conformance + * + * Purpose: Verifies that a kernel that uses group memory can be dispatched properly. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For an agent create a queue. + * 3) Query the agent's memory regions and ensure that a region exists that is a + * HSA_SEGMENT_GROUP segment type. + * 3) Load and initialize the group_memory kernel. + * 4) Create a completion signal for dispatch. + * 5) Enqueue a dispatch packet, specifying the correct group_segment_size in the + * AQL packet obtained from the hsa_ext_code_descriptor_t structure. + * 6) Wait for the kernel to finish executing and verify that it executed correctly. + * + * Expected Results: The kernel should execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_aql_group_memory() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("group_memory.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if a group memory region is available from this agent + hsa_region_t group_region; + group_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_group_memory_region, &group_region); + if ((uint64_t)-1 == group_region.handle) { + // If group memory isn't available, skip this agent + continue; + } + + // Find a memory region in the global segment that is fine grained + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 != global_region.handle) { + // If fine grained memory isn't available, skip this agent + continue; + } + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__group_memory_static_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel + // The size of the data block must be able to fit into one workgroup + uint32_t block_size = 128; + uint32_t* data_in; + uint32_t* data_out; + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_memory_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + launch_memory_kernel(queue, data_in, data_out, block_size, + 0, symbol_record.group_segment_size, + symbol_record.kernel_object, + kernarg_buffer); + + // Verify the kernel was executed correctly + ASSERT(HSA_STATUS_SUCCESS == status); + for (kk = 0; kk < block_size; ++kk) { + if (data_in[kk] != data_out[kk]) { + ASSERT(0); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the data buffers + status = hsa_memory_free(data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_group_memory_overspecified.c b/src/core/aql/test_aql_group_memory_overspecified.c new file mode 100644 index 0000000..d097d6b --- /dev/null +++ b/src/core/aql/test_aql_group_memory_overspecified.c @@ -0,0 +1,226 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_group_memory_overspecified + * Scope: Conformance + * + * Purpose: Verifies that a kernel that uses group memory can be dispatched properly, + * even if the value specified in the aql packet's group_segment_size field is + * greater than the value given in the hsa_ext_code_descriptor_t structure. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For an agent create a queue. + * 3) Query the agent's memory regions and ensure that a region exists that is a + * HSA_SEGMENT_GROUP segment type. + * 3) Load and initialize the group_memory kernel. + * 4) Create a completion signal for dispatch. + * 5) Enqueue a dispatch packet, specifying the correct group_segment_size in the + * AQL packet twice the size given in the hsa_ext_code_descriptor_t structure. + * 6) Wait for the kernel to finish executing and verify that it executed correctly. + * + * Expected Results: The kernel should execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_aql_group_memory_overspecified() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("group_memory.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if a group memory region is available from this agent + hsa_region_t group_region; + group_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_group_memory_region, &group_region); + if ((uint64_t)-1 == group_region.handle) { + // Skip the test if group memory isn't available + continue; + } + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + // Skip the test if global fine grained memory isn't available + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__group_memory_static_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel + uint32_t block_size = 128; + uint32_t* data_in; + uint32_t* data_out; + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_memory_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + launch_memory_kernel(queue, data_in, data_out, block_size, + 0, + 2 * symbol_record.group_segment_size, + symbol_record.kernel_object, + kernarg_buffer); + + // Verify the kernel was executed correctly + for (kk = 0; kk < block_size; ++kk) { + if (data_in[kk] != data_out[kk]) { + ASSERT(0); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the data buffers + status = hsa_memory_free(data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_launch_size.c b/src/core/aql/test_aql_launch_size.c new file mode 100644 index 0000000..027a774 --- /dev/null +++ b/src/core/aql/test_aql_launch_size.c @@ -0,0 +1,379 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_launch_size + * Scope: Conformance + * + * Purpose: Verifies that an agent supports all work-item values that are + * reported in the HSA_AGENT_INFO_WORKGROUP_MAX_DIM and HSA_AGENT_INFO_GRID_MAX_DIM + * agent attributes. The total number of work-items must be limited to values + * specified in the HSA_AGENT_INFO_WORKGROUP_MAX_SIZE and HSA_AGENT_INFO_GRID_MAX_SIZE + * values. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For each agent create a queue. + * 3) Load and initialize the init_data kernel. + * 4) Allocate a memory region that is large enough to represent a maximal execution + * of the 3 dimensional init_data kernel, i.e. it should represent a 3 dimensional + * region that is min(grid_max_dim[0] x grid_max_dim[1] x grid_max_dim[2] | grid_max_size) + * in size. + * 5) Execute several kernels, iterating through the possible values of: + * a) dimensions - 1 to 3 + * b) Work group size per dimension x - 1 to wg_max_dim[x] in increments of 1 + * c) Work group size per dimension y - 0 to wg_max_dim[y] in increments of 1 + * d) Work group size per dimension z - 0 to wg_max_dim[z] in increments of 1 + * e) Grid size per dimension x - 1 to grid_max_size[x] in increments of 1 + * f) Grid size per dimension y - 0 to grid_max_size[y] in increments of 1 + * g) Grid size per dimension z - 0 to grid_max_size[z] in increments of 1 + * h) Use a row_pitch equal to the grid_size[x] value + * i) Use a slice_pitch equal to the grid_size[x] * grid_size[y] value + * 6) Verify after each execution that only the specified part of the memory region was + * modified. + * 7) Reinitialize the memory region for each execution. + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +void launch_kernel_grid_size_workgroup_size( + hsa_queue_t* queue, + uint32_t* data, + uint32_t data_size, + uint32_t total_size, + uint32_t value, + int dim, + hsa_dim3_t grid_dim, + hsa_dim3_t workgroup_dim, + uint64_t kernel_obj_address, + void* kernarg_address) { + // Launch the kernel + launch_kernel(queue, data, total_size, value, dim, grid_dim, workgroup_dim, kernel_obj_address, kernarg_address); + + // Verify the data[0 --> data_size -1] has been updated correctly + int ii; + for (ii = 0; ii < data_size; ++ii) { + if (data[ii] != value) { + ASSERT(0); + } + } + + // Verify the rest of the data, data[data_size --> total_size - 1], has not been touched + for (; ii < total_size; ++ii) { + if (data[ii] != 0) { + ASSERT(0); + } + } +} + +int test_aql_launch_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Continue if this agent does not support DISPATCH + continue; + } + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + // Skip the test if global fine grained memory isn't available + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + hsa_dim3_t grid_max_dim; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t grid_max_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_SIZE, &grid_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint16_t workgroup_max_dim[3]; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_DIM, workgroup_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t workgroup_max_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &workgroup_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Overwrite the grid_max_dim, grid_max_size, workgroup_max_dim, workgroup_max_size + const uint32_t n = 1024; + grid_max_dim.x = n; + grid_max_dim.y = n; + grid_max_dim.z = 4; + grid_max_size = n * n * 4; + workgroup_max_dim[0] = 256; + workgroup_max_dim[1] = 256; + workgroup_max_dim[2] = 256; + workgroup_max_size = 256; + + // Allocate the data buffer + uint32_t total_size = grid_max_size; + uint32_t *data; + status = hsa_memory_allocate(global_region, total_size * sizeof(uint32_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the queues + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_dim3_t grid_dim; + hsa_dim3_t workgroup_dim; + int value = 1; + int dim; + + uint32_t X_Min; + uint32_t X_Max; + uint32_t X_Step; + uint32_t Y_Min; + uint32_t Y_Max; + uint32_t Y_Step; + uint32_t Z_Min; + uint32_t Z_Max; + uint32_t Z_Step; + + // Launch kernels to work on 1D data + // test every size of grid_dim_x + X_Min = 1; + X_Max = 1024; + X_Step = 1; + // uint32_t X = grid_max_dim.x; + dim = 1; + grid_dim.y = 1; + grid_dim.z = 1; + workgroup_dim.y = 1; + workgroup_dim.z = 1; + for (grid_dim.x = X_Min; grid_dim.x <= X_Max; grid_dim.x += X_Step) { + uint32_t data_size = grid_dim.x * grid_dim.y * grid_dim.z; + workgroup_dim.x = 1; + while (workgroup_dim.x < workgroup_max_size) { + launch_kernel_grid_size_workgroup_size( + queue, + data, + data_size, + (int)total_size, + value, dim, + grid_dim, workgroup_dim, + symbol_record.kernel_object, kernarg_buffer); + ++value; + workgroup_dim.x *= 2; + } + } + + // Launch kernels to work on 2D data + X_Min = 960; + X_Max = 1024; + X_Step = 1; + Y_Min = 1022; + Y_Max = 1024; + Y_Step = 1; + dim = 2; + grid_dim.z = 1; + workgroup_dim.z = 1; + for (grid_dim.y = Y_Min; grid_dim.y <= Y_Max; grid_dim.y += Y_Step) { + for (grid_dim.x = X_Min; grid_dim.x <= X_Max; grid_dim.x += X_Step) { + workgroup_dim.x = 1; + workgroup_dim.y = 1; + uint32_t data_size = grid_dim.x * grid_dim.y * grid_dim.z; + while (workgroup_dim.x < workgroup_max_size) { + while (workgroup_dim.y < workgroup_max_size) { + launch_kernel_grid_size_workgroup_size( + queue, + data, + data_size, + (int)total_size, + value, dim, + grid_dim, workgroup_dim, + symbol_record.kernel_object, kernarg_buffer); + ++value; + workgroup_dim.y *= 4; + } + workgroup_dim.x *= 4; + } + } + } + + // Launch kernels to work on 3D data + X_Min = 960; + X_Max = 1024; + X_Step = 1; + Y_Min = 1023; // 960; + Y_Max = 1024; + Y_Step = 1; + Z_Min = 2; + Z_Max = 4; + Z_Step = 1; + dim = 3; + for (grid_dim.z = Z_Min; grid_dim.z <= Z_Max; grid_dim.z += Z_Step) { + for (grid_dim.y = Y_Min; grid_dim.y <= Y_Max; grid_dim.y += Y_Step) { + for (grid_dim.x = X_Min; grid_dim.x <= X_Max; grid_dim.x += X_Step) { + // The workgroup's y and z dimensions are larger than the data, + // this should reduce the efficiency of the kernel, but would + // would show how the API handles these cases. + workgroup_dim.x = 1; + workgroup_dim.y = 1; + workgroup_dim.z = 1; + uint32_t data_size = grid_dim.x * grid_dim.y * grid_dim.z; + while (workgroup_dim.x < workgroup_max_size) { + while (workgroup_dim.y < workgroup_max_size) { + while (workgroup_dim.z < workgroup_max_size) { + launch_kernel_grid_size_workgroup_size( + queue, + data, + data_size, + (int)total_size, + value, dim, + grid_dim, workgroup_dim, + symbol_record.kernel_object, kernarg_buffer); + ++value; + workgroup_dim.z *= 8; + } + workgroup_dim.y *= 8; + } + workgroup_dim.x *= 8; + } + } + } + } + + // Free the kernarg_buffer that was allocated on kernarg_region + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy queues + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the data buffers + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_private_memory.c b/src/core/aql/test_aql_private_memory.c new file mode 100644 index 0000000..c5f0b0e --- /dev/null +++ b/src/core/aql/test_aql_private_memory.c @@ -0,0 +1,216 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_private_memory + * Scope: Conformance + * + * Purpose: Verifies that a kernel that uses private memory can be dispatched properly. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For an agent create a queue. + * 3) Query the agent's memory regions and ensure that a region exists that is a + * HSA_SEGMENT_PRIVATE segment type. + * 3) Load and initialize the private_memory kernel. + * 4) Create a completion signal for dispatch. + * 5) Enqueue a dispatch packet, specifying the correct private_segment_size in the + * AQL packet obtained from the hsa_ext_code_descriptor_t structure. + * 6) Wait for the kernel to finish executing and verify that it executed correctly. + * + * Expected Results: The kernel should execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_aql_private_memory() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("private_memory.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + // Skip the agent if global fine grained memory isn't available + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__private_memory_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel + // The size of the data block must be able to fit into one workgroup + uint32_t block_size = 128; + uint32_t* data_in; + uint32_t* data_out; + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_memory_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + int mult = 2; + + launch_memory_kernel(queue, data_in, data_out, block_size, + symbol_record.private_segment_size, + 0, + symbol_record.kernel_object, + kernarg_buffer); + + // Verify the kernel was executed correctly + for (kk = 0; kk < block_size; ++kk) { + if (data_in[kk] != data_out[kk]) { + ASSERT(0); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the data buffers + status = hsa_memory_free(data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy program + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_private_memory_overspecified.c b/src/core/aql/test_aql_private_memory_overspecified.c new file mode 100644 index 0000000..773d541 --- /dev/null +++ b/src/core/aql/test_aql_private_memory_overspecified.c @@ -0,0 +1,221 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_private_memory_overspecified + * Scope: Conformance + * + * Purpose: Verifies that a kernel that uses private memory can be dispatched properly, + * even if the value specified in the aql packet's private_segment_size field is + * greater than the value given in the hsa_ext_code_descriptor_t structure. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For an agent create a queue. + * 3) Query the agent's memory regions and ensure that a region exists that is a + * HSA_SEGMENT_PRIVATE segment type. + * 3) Load and initialize the private_memory kernel. + * 4) Create a completion signal for dispatch. + * 5) Enqueue a dispatch packet, specifying a value for private_segment_size in the + * AQL packet twice the size given in the hsa_ext_code_descriptor_t structure. + * 6) Wait for the kernel to finish executing and verify that it executed correctly. + * + * Expected Results: The kernel should execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_aql_private_memory_overspecified() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("private_memory.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__private_memory_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel + // The size of the data block must be able to fit into one workgroup + uint32_t block_size = 128; + uint32_t* data_in; + uint32_t* data_out; + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_memory_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + int mult = 2; + + launch_memory_kernel(queue, + data_in, + data_out, + block_size, + 2 * symbol_record.private_segment_size, + 0, + symbol_record.kernel_object, + kernarg_buffer); + + // Verify the kernel was executed correctly + for (kk = 0; kk < block_size; ++kk) { + if (data_in[kk] != data_out[kk]) { + ASSERT(0); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the data buffers + status = hsa_memory_free(data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_aql_zero_wg_size.c b/src/core/aql/test_aql_zero_wg_size.c new file mode 100644 index 0000000..c79afc8 --- /dev/null +++ b/src/core/aql/test_aql_zero_wg_size.c @@ -0,0 +1,226 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: aql_zero_wg_size + * Scope: Conformance + * + * Purpose: Verifies that an aql dispatch that specifies a work group size + * of 0 in a valid work group dimension will succeed in executing, but will + * do no meaningful work. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For each agent create a queue. + * 3) Load and initialize the init_data kernel. + * 4) Allocate a memory region that is large enough to represent a 3 dimensional + * init_data kernel of moderate size, possibly 256x1x1. + * 5) Execute several kernels, iterating through the possible values of: + * a) dimensions - 1 to 3 + * b) Work group size per dimension x - 0 if dimensions is 1, 1 otherwise. + * c) Work group size per dimension y - 0 if dimensions is 2, 1 otherwise. + * d) Work group size per dimension z - 0 if dimensions is 3, 1 otherwise. + * e) Use a row_pitch equal to the grid_size[x] + * f) Use a slice_pitch equal to the grid_size[x] * grid_size[y] value + * 6) Verify after each execution that only the the kernel executed (the + * completion signal decrements) but that no part of the memory region was modified. + * + * Expected Result: The executions should complete, but the memory should not + * be modified. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +#define MIN_GRID_SIZE 256 + +int test_aql_zero_wg_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Continue if this agent does not support DISPATCH + continue; + } + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + // Skip the test if global fine grained memory isn't available + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + uint32_t grid_max_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_SIZE, &grid_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(grid_max_size > MIN_GRID_SIZE); + + // Allocate the data buffer + uint32_t *data; + status = hsa_memory_allocate(global_region, MIN_GRID_SIZE * sizeof(uint32_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the queues + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + int dim; + hsa_dim3_t grid_dim; + hsa_dim3_t workgroup_dim; + for (dim = 1; dim <= 3; ++dim) { + workgroup_dim.x = (dim == 1) ? 0: 1; + workgroup_dim.y = (dim == 2) ? 0: 1; + workgroup_dim.z = (dim == 3) ? 0: 1; + + grid_dim.x = (dim == 1) ? 0: MIN_GRID_SIZE; + grid_dim.y = (dim == 2) ? 0: MIN_GRID_SIZE; + grid_dim.z = (dim == 3) ? 0: MIN_GRID_SIZE; + + launch_kernel(queue, data, MIN_GRID_SIZE, 1, dim, grid_dim, workgroup_dim, symbol_record.kernel_object, kernarg_buffer); + + // Verify that no data was modified + int jj; + for (jj = 0; jj < MIN_GRID_SIZE; ++jj) { + ASSERT(0 == data[jj]); + } + } + + // Free the kernarg_buffer that was allocated on kernarg_region + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy queues + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the data buffers + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/aql/test_helper_func.c b/src/core/aql/test_helper_func.c new file mode 100644 index 0000000..baa750b --- /dev/null +++ b/src/core/aql/test_helper_func.c @@ -0,0 +1,188 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +// Clear the data, launch the kernel, and wait for the execution to complete +void launch_kernel( + hsa_queue_t* queue, + uint32_t* data, + uint32_t total_size, + uint32_t value, + int dim, + hsa_dim3_t grid_dim, + hsa_dim3_t workgroup_dim, + uint64_t kernel_obj_address, + void* kernarg_address) { + hsa_status_t status; + + // Clear the data + memset(data, 0, sizeof(uint32_t) * total_size); + + // Setup the kernarg data structure + kernarg_t args; + args.data = data; + args.value = value; + args.row_pitch = grid_dim.x; + args.slice_pitch = grid_dim.x * grid_dim.y; + memcpy((void*)kernarg_address, &args, sizeof(args)); + + // Create a signal with initial value of 1 + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Request a new packet ID + uint64_t packet_id = hsa_queue_add_write_index_acquire(queue, 1); + + while (packet_id - hsa_queue_load_read_index_relaxed(queue) >= queue->size) {} + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)queue->base_address + + packet_id % queue->size; + + // Initialize the packet + memset(dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet->completion_signal = signal; + dispatch_packet->setup = dim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = (uint16_t)workgroup_dim.x; + dispatch_packet->workgroup_size_y = (uint16_t)workgroup_dim.y; + dispatch_packet->workgroup_size_z = (uint16_t)workgroup_dim.z; + dispatch_packet->grid_size_x = grid_dim.x; + dispatch_packet->grid_size_y = grid_dim.y; + dispatch_packet->grid_size_z = grid_dim.z; + dispatch_packet->kernel_object = kernel_obj_address; + dispatch_packet->kernarg_address = (void*) kernarg_address; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Wait until the kernel completes + while (0 != hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + // Destroy the signal + hsa_signal_destroy(signal); + + return; +} + + +void launch_memory_kernel( + hsa_queue_t* queue, + void* in, + void* out, + uint32_t data_size, + uint32_t private_memory_size, + uint32_t group_memory_size, + uint64_t kernel_obj_address, + void* kernarg_address) { + hsa_status_t status; + + // Clear the data + memset(out, 0, sizeof(int) * data_size); + + // Setup the kernarg + kernarg_memory_t args; + args.in = in; + args.out = out; + args.count = data_size; + memcpy((void*)kernarg_address, &args, sizeof(args)); + + // Create a signal with initial value of 1 + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Request a new packet ID + uint64_t packet_id = hsa_queue_add_write_index_acquire(queue, 1); + + while (packet_id - hsa_queue_load_read_index_relaxed(queue) >= queue->size) {} + + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)queue->base_address + + packet_id % queue->size; + + // Initialize the packet + memset(dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet->completion_signal = signal; + dispatch_packet->header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet->header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet->setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = (uint16_t)data_size; + dispatch_packet->workgroup_size_y = (uint16_t)1; + dispatch_packet->workgroup_size_z = (uint16_t)1; + dispatch_packet->grid_size_x = (uint16_t)data_size; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->private_segment_size = private_memory_size; + dispatch_packet->group_segment_size = group_memory_size; + dispatch_packet->kernel_object = kernel_obj_address; + dispatch_packet->kernarg_address = (void*) kernarg_address; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Wait until the kernel complete + while (0 != hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + hsa_signal_destroy(signal); + + return; +} diff --git a/src/core/aql/test_helper_func.h b/src/core/aql/test_helper_func.h new file mode 100644 index 0000000..2ee8e9d --- /dev/null +++ b/src/core/aql/test_helper_func.h @@ -0,0 +1,112 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _TEST_HELPER_FUNC_H_ +#define _TEST_HELPER_FUNC_H_ + +#include + +// Structure alignment macro +#ifndef __ALIGNED__ +#if defined(__GNUC__) +#define __ALIGNED__(x) __attribute__((aligned(x))) +#elif defined(_MSC_VER) +#define __ALIGNED__(x) __declspec(align(x)) +#else +#error \ + "Your compiler is not recognized. Add an alignment macro to support it " \ + "or define __ALIGNED__(x) to the proper alignment property." +#endif +#endif + +#define HSA_ARGUMENT_ALIGN_BYTES 16 +#define HSA_QUEUE_ALIGN_BYTES 64 +#define HSA_PACKET_ALIGN_BYTES 64 + +#define min(a, b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; }) + +typedef struct __attribute__ ((aligned(HSA_ARGUMENT_ALIGN_BYTES))) kernarg_s { + void* data; + uint32_t value; + uint32_t row_pitch; + uint32_t slice_pitch; +} kernarg_t; + + +// void callback_queue_error(hsa_status_t status, hsa_queue_t* queue, void* data); + +// Clear the data, launch the kernel, and wait for the execution to complete +void launch_kernel( + hsa_queue_t* queue, + uint32_t* data, + uint32_t total_size, + uint32_t value, + int dim, + hsa_dim3_t grid_dim, + hsa_dim3_t workgroup_dim, + uint64_t kernel_obj_address, + void* kernarg_address); + +typedef struct __attribute__ ((aligned(16))) kernarg_memory_s { + void* in; + void* out; + int count; +} kernarg_memory_t; + +// Launch a memory kernel (private or group), and wait for the execute to complete +void launch_memory_kernel( + hsa_queue_t* queue, + void* in, + void* out, + uint32_t data_size, + uint32_t private_memory_size, + uint32_t group_memory_size, + uint64_t kernel_obj_address, + void* kernarg_address); + +#endif // _TEST_HELPER_FUNC_H_ diff --git a/src/core/async/hsa_async.c b/src/core/async/hsa_async.c new file mode 100644 index 0000000..ab8c587 --- /dev/null +++ b/src/core/async/hsa_async.c @@ -0,0 +1,64 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_async.h" + +DEFINE_TEST(async_invalid_kernel_object); +DEFINE_TEST(async_invalid_group_memory); +DEFINE_TEST(async_invalid_dimensions); +DEFINE_TEST(async_invalid_workgroup_size); +DEFINE_TEST(async_invalid_packet); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(async_invalid_kernel_object); + ADD_TEST(async_invalid_group_memory); + ADD_TEST(async_invalid_dimensions); + ADD_TEST(async_invalid_workgroup_size); + ADD_TEST(async_invalid_packet); + RUN_TESTS(); + return 0; +} diff --git a/src/core/async/hsa_async.h b/src/core/async/hsa_async.h new file mode 100644 index 0000000..2ad1fa8 --- /dev/null +++ b/src/core/async/hsa_async.h @@ -0,0 +1,53 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_ASYNC_H_ +#define _HSA_ASYNC_H_ +extern int test_async_invalid_kernel_object(); +extern int test_async_invalid_group_memory(); +extern int test_async_invalid_dimensions(); +extern int test_async_invalid_workgroup_size(); +extern int test_async_invalid_packet(); +#endif // _HSA_ASYNC_H_ diff --git a/src/core/async/test_async_invalid_dimensions.c b/src/core/async/test_async_invalid_dimensions.c new file mode 100644 index 0000000..3e16203 --- /dev/null +++ b/src/core/async/test_async_invalid_dimensions.c @@ -0,0 +1,169 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: async_invalid_dimensions + * Scope: Conformance + * + * Purpose: Verifies that if an aql packet specifies a dimension + * value above 3, the queue's error handling callback will trigger. + * + * Test Description: + * 1) For each agent on the platform that supports kernel dispatch, + * create a max queues each with a valid callback. + * 2) Dispatch a packet with a dimension size greater than 3 on each + * queue. + * + * Expected Results: The queues callback should trigger, and the queue + * id should be correctly passed to the callback. + * + */ + +#include +#include +#include +#include +#include +#include "test_async_utils.h" + +int test_async_invalid_dimensions() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 4 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.completion_signal.handle = 0; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Make sure the agent supports kernel dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Fill in the kernel dispatch information + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.kernarg_address = NULL; + + // Run the test for this agent + async_test(agent_list.agents[ii], &dispatch_packet, HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/async/test_async_invalid_group_memory.c b/src/core/async/test_async_invalid_group_memory.c new file mode 100644 index 0000000..8ce83ff --- /dev/null +++ b/src/core/async/test_async_invalid_group_memory.c @@ -0,0 +1,188 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: async_invalid_group_memory + * Scope: Conformance + * + * Purpose: Verifies that if an aql packet specifies an invalid group + * group memory segment size, the queue's error handling + * callback will trigger. + * + * Test Description: + * 1) For each agent on the platform that supports kernel dispatch, + * create a max queues each with a valid callback. + * 2) Dispatch a packet with a large group memory size to each + * queue. + * + * Expected Results: The queues callback should trigger, and the queue + * id should be correctly passed to the callback. + * + */ + +#include +#include +#include +#include +#include +#include "test_async_utils.h" + +int test_async_invalid_group_memory() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("group_memory.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.completion_signal.handle = 0; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Make sure the agent supports kernel dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__group_memory_static_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(symbol_record.kernarg_segment_size), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + memset(kernarg_buffer, 0, sizeof(symbol_record.kernarg_segment_size)); + + // Fill in the kernel dispatch information + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = kernarg_buffer; + + // Request a large group memory segment size + dispatch_packet.group_segment_size = (uint32_t)-1; + + // Run the test for this agent + async_test(agent_list.agents[ii], &dispatch_packet, HSA_STATUS_ERROR_INVALID_ALLOCATION); + + // Free the kernarg + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/async/test_async_invalid_kernel_object.c b/src/core/async/test_async_invalid_kernel_object.c new file mode 100644 index 0000000..6169c3a --- /dev/null +++ b/src/core/async/test_async_invalid_kernel_object.c @@ -0,0 +1,120 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: async_invalid_kernel_object + * Scope: Conformance + * + * Purpose: Verifies that if an aql packet specifies an invalid + * kernel object, the queue's error handling callback will trigger. + * + * Test Description: + * 1) For each agent on the platform that supports kernel dispatch, + * create a max queues each with a valid callback. + * 2) Dispatch a packet with an invalid kernel object (NULL) on each + * queue. + * + * Expected Results: The queues callback should trigger, and the queue + * id should be correctly passed to the callback. + * + */ + +#include +#include +#include +#include +#include "test_async_utils.h" + +int test_async_invalid_kernel_object() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = 0; + dispatch_packet.group_segment_size = 0; + dispatch_packet.private_segment_size = 0; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal.handle = 0; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Make sure the agent supports kernel dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + async_test(agent_list.agents[ii], &dispatch_packet, HSA_STATUS_ERROR_INVALID_CODE_OBJECT); + } + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/async/test_async_invalid_packet.c b/src/core/async/test_async_invalid_packet.c new file mode 100644 index 0000000..34dd44c --- /dev/null +++ b/src/core/async/test_async_invalid_packet.c @@ -0,0 +1,104 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: async_invalid_packet + * Scope: Conformance + * + * Purpose: Verifies that if an aql packet is invalid (bad packet type), + * the queue's error handling callback will trigger. + * + * Test Description: + * 1) For each agent on the platform that supports kernel dispatch, + * create a max queues each with a valid callback. + * 2) Dispatch a packet with a invalid packet type. + * + * Expected Results: The queues callback should trigger, and the queue + * id should be correctly passed to the callback. + * + */ + +#include +#include +#include +#include +#include "test_async_utils.h" + +int test_async_invalid_packet() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= -1 << HSA_PACKET_HEADER_TYPE; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Make sure the agent supports kernel dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + async_test(agent_list.agents[ii], &dispatch_packet, HSA_STATUS_ERROR_INVALID_PACKET_FORMAT); + } + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/async/test_async_invalid_workgroup_size.c b/src/core/async/test_async_invalid_workgroup_size.c new file mode 100644 index 0000000..c65626b --- /dev/null +++ b/src/core/async/test_async_invalid_workgroup_size.c @@ -0,0 +1,175 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: async_invalid_workgroup_size + * Scope: Conformance + * + * Purpose: Verifies that if an aql packet specifies an invalid + * workgroup size, the queue's error handling callback will trigger. + * + * Test Description: + * 1) For each agent on the platform that supports kernel dispatch, + * create a max queues each with a valid callback. + * 2) Dispatch a packet with workgroup size greater than the value + * specified by the agent. Do this on each queue. + * + * Expected Results: The queues callback should trigger, and the queue + * id should be correctly passed to the callback. + * + */ + +#include +#include +#include +#include +#include +#include "test_async_utils.h" + +int test_async_invalid_workgroup_size() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.workgroup_size_x = 0; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.completion_signal.handle = 0; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Make sure the agent supports kernel dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Fill in the kernel dispatch information + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.kernarg_address = NULL; + + // Run the test for this agent, testing an invalid work group size in all three dimensions + int jj; + for (jj = 1; jj <= 3; ++jj) { + dispatch_packet.setup |= jj << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = (jj == 1) ? (uint16_t)-1 : 1; + dispatch_packet.workgroup_size_y = (jj == 2) ? (uint16_t)-1 : 1; + dispatch_packet.workgroup_size_z = (jj == 3) ? (uint16_t)-1 : 1; + async_test(agent_list.agents[ii], &dispatch_packet, HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS); + } + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/async/test_async_utils.c b/src/core/async/test_async_utils.c new file mode 100644 index 0000000..9c9e9db --- /dev/null +++ b/src/core/async/test_async_utils.c @@ -0,0 +1,110 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include "test_async_utils.h" + +hsa_status_t global_status; +hsa_queue_t* global_queue_handle; +hsa_signal_t global_signal; + +void async_callback(hsa_status_t status, hsa_queue_t* queue_handle, void* data) { + global_status = status; + global_queue_handle = queue_handle; + hsa_signal_store_relaxed(global_signal, 1); + return; +} + +void async_test(hsa_agent_t agent, hsa_kernel_dispatch_packet_t* dispatch_packet, hsa_status_t expected_status) { + // Initialize the global signal + hsa_status_t status = hsa_signal_create(0, 0, NULL, &global_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t queue_max; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create queue_max queues for the agent. + int ii; + hsa_queue_t* queue[queue_max]; + for (ii = 0; ii < queue_max; ++ii) { + status = hsa_queue_create(agent, 1024, HSA_QUEUE_TYPE_SINGLE, async_callback, NULL, UINT32_MAX, UINT32_MAX, &queue[ii]); + if (HSA_STATUS_SUCCESS != status) { + ASSERT(HSA_STATUS_ERROR_OUT_OF_RESOURCES == status); + queue_max = ii; + } + } + + for (ii = 0; ii < queue_max; ++ii) { + // Initialize the global variables + global_status = HSA_STATUS_SUCCESS; + global_queue_handle = 0; + hsa_signal_store_relaxed(global_signal, 0); + + // Dispatch the packet. + enqueue_dispatch_packet(queue[ii], dispatch_packet); + + // Wait on the global signal value to change to 1 + while (1 != hsa_signal_wait_relaxed(global_signal, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + // Verify the global_status and global_queue_handle values were set correctly + ASSERT(global_queue_handle == queue[ii]); + ASSERT(expected_status == global_status); + } + + // Destroy the queues + for (ii = 0; ii < queue_max; ++ii) { + status = hsa_queue_destroy(queue[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the global signal + status = hsa_signal_destroy(global_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} diff --git a/src/core/async/test_async_utils.h b/src/core/async/test_async_utils.h new file mode 100644 index 0000000..6320c32 --- /dev/null +++ b/src/core/async/test_async_utils.h @@ -0,0 +1,53 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _TEST_ASYNC_UTILS_H_ +#define _TEST_ASYNC_UTILS_H_ + +void async_callback(hsa_status_t status, hsa_queue_t* queue_handle, void* data); + +void async_test(hsa_agent_t agent, hsa_kernel_dispatch_packet_t* dispatch_packet, hsa_status_t expected_status); + +#endif // _TEST_ASYNC_UTILS_H_ diff --git a/src/core/code/hsa_code.c b/src/core/code/hsa_code.c new file mode 100644 index 0000000..9e06a9e --- /dev/null +++ b/src/core/code/hsa_code.c @@ -0,0 +1,75 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_code.h" + +DEFINE_TEST(code_define_global_agent); +DEFINE_TEST(code_define_global_program); +DEFINE_TEST(code_define_readonly_agent); +DEFINE_TEST(code_mixed_scope); +DEFINE_TEST(code_module_scope_symbol); +DEFINE_TEST(code_program_scope_symbol); +DEFINE_TEST(code_multiple_executables); +DEFINE_TEST(code_serialize_deserialize); +DEFINE_TEST(code_iterate_symbols); +DEFINE_TEST(code_kernarg_alignment); +DEFINE_TEST(code_recursive_kernel_function); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(code_define_global_agent); + ADD_TEST(code_define_global_program); + ADD_TEST(code_define_readonly_agent); + ADD_TEST(code_mixed_scope); + ADD_TEST(code_module_scope_symbol); + ADD_TEST(code_program_scope_symbol); + ADD_TEST(code_multiple_executables); + ADD_TEST(code_serialize_deserialize); + ADD_TEST(code_iterate_symbols); + ADD_TEST(code_kernarg_alignment); + ADD_TEST(code_recursive_kernel_function); + RUN_TESTS(); +} diff --git a/src/core/code/hsa_code.h b/src/core/code/hsa_code.h new file mode 100644 index 0000000..3e8c0ad --- /dev/null +++ b/src/core/code/hsa_code.h @@ -0,0 +1,61 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_CODE_H_ +#define _HSA_CODE_H_ + +extern int test_code_define_global_agent(); +extern int test_code_define_global_program(); +extern int test_code_define_readonly_agent(); +extern int test_code_mixed_scope(); +extern int test_code_module_scope_symbol(); +extern int test_code_program_scope_symbol(); +extern int test_code_multiple_executables(); +extern int test_code_serialize_deserialize(); +extern int test_code_iterate_symbols(); +extern int test_code_kernarg_alignment(); +extern int test_code_recursive_kernel_function(); + +#endif // _HSA_CODE_H_ diff --git a/src/core/code/test_code_define_global_agent.c b/src/core/code/test_code_define_global_agent.c new file mode 100644 index 0000000..3b3eaed --- /dev/null +++ b/src/core/code/test_code_define_global_agent.c @@ -0,0 +1,247 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_define_global_agent + * Scope: Conformance + * + * Purpose: Verify that a global variable with program allocation can + * be defined for an executable and used during a dispatch. + * + * + * Test Description: + * 1. Create a code object by loading the global_variable Brig module + * and finalize the module. + * 2. For each agent that supports kernel dispatch: + * a) Create an executable and load the code object. + * b) Define the global object using the hsa_executable_agent_global_variable_define + * API with a valid name and address. + * c) Extract a kernel that uses the global variable. + * d) Execute the kernel on the agent. + * + * Expected Results: The kernel should execute correctly on each agent and the global + * variable should be accessed correctly. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_code_define_global_agent() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("global_agent_vector_copy.brig", &module)); + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, agent_isa, call_convention, control_directives, "", code_object_type, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t data_size = 256; + // Allocate a global data array + uint32_t* global_data = (uint32_t*)malloc(data_size * sizeof(uint32_t)); + memset(global_data, 0, data_size * sizeof(uint32_t)); + + // Define this global variable + status = hsa_executable_agent_global_variable_define(executable, agent_list.agents[ii], "&b", (void*)global_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing the program should not affect the executable + pfn.hsa_ext_program_destroy(program); + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate data to be used as the input of the kernel + uint32_t* data_input; + data_input = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + // Initialize the data + uint32_t jj; + for (jj = 0; jj < data_size; ++jj) { + data_input[jj] = jj; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(uint32_t*), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + // Setup the kernarg buffer + memcpy((void*)kernarg_buffer, &data_input, sizeof(uint32_t*)); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + char* symbol_names[1]; + symbol_names[0] = "&__global_vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, data_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + int cmp_results = memcmp(data_input, global_data, sizeof(uint32_t) * data_size); + ASSERT(0 == cmp_results); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the global data + free(global_data); + + // Free the input data + free(data_input); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_define_global_program.c b/src/core/code/test_code_define_global_program.c new file mode 100644 index 0000000..6e5d74f --- /dev/null +++ b/src/core/code/test_code_define_global_program.c @@ -0,0 +1,246 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_define_global_program + * Scope: Conformance + * + * Purpose: Verify that a global variable with program allocation can + * be defined for an executable and used during a dispatch. + * + * + * Test Description: + * 1. Create a code object by loading the global_variable Brig module + and finalize the module. + * 2. For each agent that supports kernel dispatch: + * a) Create an executable and load the code object. + * b) Define the global object using the hsa_executable_global_variable_define + * API with a valid name and address. Do not specify the agent. + * c) Extract a kernel that uses the global variable. + * d) Execute the kernel on the agent. + * + * Expected Results: The kernel should execute correctly on each agent and the global + * variable should be accessed correctly. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_code_define_global_program() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("global_vector_copy.brig", &module)); + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, agent_isa, call_convention, control_directives, "", code_object_type, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t data_size = 256; + // Allocate a global data array + uint32_t* global_data = (uint32_t*)malloc(data_size * sizeof(uint32_t)); + memset(global_data, 0, data_size * sizeof(uint32_t)); + + // Define this global variable + status = hsa_executable_global_variable_define(executable, "&b", (void*)global_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing the program should not affect the executable + pfn.hsa_ext_program_destroy(program); + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate data to be used as the input of the kernel + uint32_t* data_input; + data_input = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + // Initialize the data + uint32_t jj; + for (jj = 0; jj < data_size; ++jj) { + data_input[jj] = jj; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(uint32_t*), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + // Setup the kernarg buffer + memcpy((void*)kernarg_buffer, &data_input, sizeof(uint32_t*)); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + char* symbol_names[1]; + symbol_names[0] = "&__global_vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, data_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + int cmp_results = memcmp(data_input, global_data, sizeof(uint32_t) * data_size); + ASSERT(0 == cmp_results); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the global data + free(global_data); + + // Free the input data + free(data_input); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_define_readonly_agent.c b/src/core/code/test_code_define_readonly_agent.c new file mode 100644 index 0000000..5d06895 --- /dev/null +++ b/src/core/code/test_code_define_readonly_agent.c @@ -0,0 +1,247 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_define_readonly_agent + * Scope: Conformance + * + * Purpose: Verify that an external read-only variable with can + * be defined for an executable and used during a dispatch. + * + * + * Test Description: + * 1. Create a code object by loading the readonly_variable Brig module + and finalize the module. + * 2. For each agent that supports kernel dispatch: + * a) Create an executable and load the code object. + * b) Define the global object using the hsa_executable_readonly_variable_define + * API with a valid name and address. + * c) Extract a kernel that uses the readonly variable. + * d) Execute the kernel on the agent. + * + * Expected Results: The kernel should execute correctly on each agent and the readonly + * variable should be accessed correctly. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + + +int test_code_define_readonly_agent() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("readonly_vector_copy.brig", &module)); + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, agent_isa, call_convention, control_directives, "", code_object_type, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t data_size = 256; + uint32_t* global_data; + global_data = malloc(sizeof(uint32_t) * data_size); + // Initialize the global data + uint32_t jj; + for (jj = 0; jj < data_size; ++jj) { + global_data[jj] = jj; + } + + // Define this global variable + status = hsa_executable_readonly_variable_define(executable, agent_list.agents[ii], "&a", (void*)global_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing the program should not affect the executable + pfn.hsa_ext_program_destroy(program); + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate data to be used as the output of the kernel + uint32_t* data_output; + data_output = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + memset(data_output, 0, sizeof(uint32_t) * data_size); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(uint32_t*), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + // Setup the kernarg buffer + memcpy((void*)kernarg_buffer, &data_output, sizeof(uint32_t*)); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + char* symbol_names[1]; + symbol_names[0] = "&__readonly_vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, data_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + int cmp_results = memcmp(data_output, global_data, sizeof(uint32_t) * data_size); + ASSERT(0 == cmp_results); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the global data + free(global_data); + + // Free the output data + free(data_output); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_iterate_symbols.c b/src/core/code/test_code_iterate_symbols.c new file mode 100644 index 0000000..b8e348a --- /dev/null +++ b/src/core/code/test_code_iterate_symbols.c @@ -0,0 +1,485 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_iterate_symbols + * Scope: Conformance + * + * Purpose: Iterate through all code object and executable symbols, and + * apply hsa_code_object_symbol_get_info() on each symbol in the + * code object and executable exposed by the symbol iteration callback. + * + * Test Description: + * 1. Create a code object by finalizing the vector_copy kernel. + * 2. For each agent that supports kernel dispatch, create + * an executable and load the code object. + * 3. Iterate through all code symbols by calling hsa_code_object_iterate_symbols(). + * 4. Within the callback of the iteration, query all symbol information. + * 5. For each executable, iterate through all symbols by calling + * hsa_executable_iterate_symbols(). + * 6. Within the callback, query all symbol information. + * 7. Compare the symbol information obtained from the code object and the executable, + * and ensure it is consistent. + * + * Expected Result: All symbols that were not added by an executable define call should + * be reported by both the code object and the executable. + * + */ + +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +typedef struct symbol_info_s { + hsa_symbol_kind_t symbol; + uint32_t name_length; + char name[256]; + uint32_t module_name_length; + char module_name[256]; + hsa_variable_allocation_t variable_allocation; + hsa_variable_segment_t variable_segment; + uint32_t variable_alignment; + uint32_t variable_size; + bool variable_is_const; + uint32_t kernel_kernarg_segment_size; + uint32_t kernel_kernarg_segment_alignment; + uint32_t kernel_group_segment_size; + uint32_t kernel_private_segment_size; + uint32_t indirect_function_call_convention; +} symbol_info_t; + +uint32_t exe_symbol_index; +uint32_t code_symbol_index; + +hsa_status_t callback_count_executable_symbols(hsa_executable_t exe, + hsa_executable_symbol_t symbol, + void* data) { + uint32_t* count = (uint32_t*)data; + (*count) += 1; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t callback_count_code_symbols(hsa_code_object_t code_object, + hsa_code_symbol_t symbol, + void* data) { + uint32_t* count = (uint32_t*)data; + (*count) += 1; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t callback_fill_executable_symbol_info(hsa_executable_t exe, + hsa_executable_symbol_t exe_symbol, + void* data) { + hsa_status_t status; + + // Get the executable status + hsa_executable_state_t exe_state; + status = hsa_executable_get_info(exe, HSA_EXECUTABLE_INFO_STATE, &exe_state); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_symbol_kind_t type; + uint32_t name_length; + uint32_t module_name_length; + hsa_agent_t agent; + uint64_t variable_address; + hsa_symbol_linkage_t linkage; + hsa_variable_allocation_t variable_allocation; + hsa_variable_segment_t variable_segment; + uint32_t variable_alignment; + uint32_t variable_size; + bool variable_is_const; + uint64_t kernel_object; + uint32_t kernel_kernarg_segment_size; + uint32_t kernel_kernarg_segment_alignment; + uint32_t kernel_group_segment_size; + uint32_t kernel_private_segment_size; + bool kernel_dynamic_callstack; + uint64_t indirect_function_object; + uint32_t indirect_function_call_convention; + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &type); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's type.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's name length.\n"); + ASSERT(name_length > 0); + + char name[name_length + 1]; + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's name.\n"); + name[name_length] = '\0'; + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH, &module_name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's model name length.\n"); + + char module_name[module_name_length]; + if (module_name_length > 0) { + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME, &module_name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's module name.\n"); + module_name[module_name_length] = '\0'; + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE, &linkage); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's linkage.\n"); + + if (HSA_SYMBOL_KIND_VARIABLE == type) { + if (HSA_SYMBOL_KIND_VARIABLE == type && HSA_VARIABLE_ALLOCATION_AGENT == variable_allocation) { + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &agent); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's agent.\n"); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &variable_address); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable address.\n"); + + if (HSA_EXECUTABLE_STATE_UNFROZEN == exe_state) { + ASSERT(0 == variable_address); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION, &variable_allocation); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable allocation.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT, &variable_segment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable segment.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT, &variable_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's vairable alignment.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &variable_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST, &variable_is_const); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's variable is const.\n"); + } + + if (HSA_SYMBOL_KIND_KERNEL == type) { + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel object.\n"); + if (HSA_EXECUTABLE_STATE_UNFROZEN == exe_state) { + ASSERT(0 == kernel_object); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernel_kernarg_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel kernarg segment size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kernel_kernarg_segment_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel kernarg segment alignment.\n"); + ASSERT(kernel_kernarg_segment_alignment <= 16); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &kernel_group_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel group segment size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kernel_private_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel private segment size.\n"); + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, &kernel_dynamic_callstack); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's kernel dynamic callstack.\n"); + } + + if (HSA_SYMBOL_KIND_INDIRECT_FUNCTION == type) { + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT, &indirect_function_object); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's indirect function object.\n"); + if (HSA_EXECUTABLE_STATE_UNFROZEN == exe_state) { + ASSERT(0 == indirect_function_object); + } + + status = hsa_executable_symbol_get_info(exe_symbol, HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION, &indirect_function_call_convention); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get executable symbol's indirect function call convention.\n"); + } + + symbol_info_t* symbol_info = (symbol_info_t*)data; + symbol_info += exe_symbol_index; + ++exe_symbol_index; + + symbol_info->symbol = type; + symbol_info->name_length = name_length; + strcmp(symbol_info->name, name); + symbol_info->module_name_length = module_name_length; + if (module_name_length > 0) { + strcmp(symbol_info->module_name, module_name); + } + if (HSA_SYMBOL_KIND_VARIABLE == type) { + symbol_info->variable_allocation = variable_allocation; + symbol_info->variable_segment = variable_segment; + symbol_info->variable_size = variable_size; + symbol_info->variable_alignment = variable_alignment; + symbol_info->variable_is_const = variable_is_const; + } + if (HSA_SYMBOL_KIND_KERNEL == type) { + symbol_info->kernel_kernarg_segment_size = kernel_kernarg_segment_size; + symbol_info->kernel_kernarg_segment_alignment = kernel_kernarg_segment_alignment; + symbol_info->kernel_group_segment_size = kernel_group_segment_size; + symbol_info->kernel_private_segment_size = kernel_private_segment_size; + } + if (HSA_SYMBOL_KIND_INDIRECT_FUNCTION == type) { + symbol_info->indirect_function_call_convention = indirect_function_call_convention; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t callback_fill_code_symbol_info(hsa_code_object_t code_object, + hsa_code_symbol_t code_symbol, + void* data) { + hsa_status_t status; + hsa_symbol_kind_t type; + uint32_t name_length; + uint32_t module_name_length; + hsa_symbol_linkage_t linkage; + hsa_variable_allocation_t variable_allocation; + hsa_variable_segment_t variable_segment; + uint32_t variable_alignment; + uint32_t variable_size; + bool variable_is_const; + uint32_t kernel_kernarg_segment_size; + uint32_t kernel_kernarg_segment_alignment; + uint32_t kernel_group_segment_size; + uint32_t kernel_private_segment_size; + bool kernel_dynamic_callstack; + uint32_t indirect_function_call_convention; + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_TYPE, &type); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's type.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME_LENGTH, &name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's name length.\n"); + ASSERT(name_length > 0); + + char name[name_length + 1]; + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME, &name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's name.\n"); + name[name_length] = '\0'; + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH, &module_name_length); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's model name length.\n"); + + char module_name[module_name_length]; + if (module_name_length > 0) { + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME, &module_name); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's module name.\n"); + module_name[module_name_length] = '\0'; + } + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_LINKAGE, &linkage); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's likage.\n"); + + switch (type) { + case HSA_SYMBOL_KIND_VARIABLE: + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION, &variable_allocation); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable alloation.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT, &variable_segment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable segment.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT, &variable_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable alignment.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE, &variable_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST, &variable_is_const); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's variable is const.\n"); + break; + + case HSA_SYMBOL_KIND_KERNEL: + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernel_kernarg_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel segment size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kernel_kernarg_segment_alignment); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernarg segment alignment.\n"); + ASSERT(kernel_kernarg_segment_alignment <= 16); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &kernel_group_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel group segment size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kernel_private_segment_size); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel private group size.\n"); + + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, &kernel_dynamic_callstack); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's kernel dynamic callstack.\n"); + break; + + case HSA_SYMBOL_KIND_INDIRECT_FUNCTION: + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION, &indirect_function_call_convention); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get code symbol's indirect function call convention.\n"); + break; + default: + break; + } + + symbol_info_t* symbol_info = (symbol_info_t*)data; + symbol_info += code_symbol_index; + ++code_symbol_index; + + symbol_info->symbol = type; + symbol_info->name_length = name_length; + strcmp(symbol_info->name, name); + symbol_info->module_name_length = module_name_length; + if (module_name_length > 0) { + strcmp(symbol_info->module_name, module_name); + } + if (HSA_SYMBOL_KIND_VARIABLE == type) { + symbol_info->variable_allocation = variable_allocation; + symbol_info->variable_segment = variable_segment; + symbol_info->variable_size = variable_size; + symbol_info->variable_alignment = variable_alignment; + symbol_info->variable_is_const = variable_is_const; + } + if (HSA_SYMBOL_KIND_KERNEL == type) { + symbol_info->kernel_kernarg_segment_size = kernel_kernarg_segment_size; + symbol_info->kernel_kernarg_segment_alignment = kernel_kernarg_segment_alignment; + symbol_info->kernel_group_segment_size = kernel_group_segment_size; + symbol_info->kernel_private_segment_size = kernel_private_segment_size; + } + if (HSA_SYMBOL_KIND_INDIRECT_FUNCTION == type) { + symbol_info->indirect_function_call_convention = indirect_function_call_convention; + } + + return HSA_STATUS_SUCCESS; +} + +int test_code_iterate_symbols() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the module and get the code object & executable + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + machine_model, + profile, + default_float_rounding_mode, + code_object_type, + call_convention, + control_directives, + &code_object, + &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t count_exe_symbols = 0; + uint32_t count_code_symbols = 0; + status = hsa_code_object_iterate_symbols(code_object, callback_count_code_symbols, &count_code_symbols); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_iterate_symbols(executable, callback_count_executable_symbols, &count_exe_symbols); + ASSERT(HSA_STATUS_SUCCESS == status); + + size_t symbol_info_total_size = sizeof(symbol_info_t) * count_exe_symbols; + symbol_info_t* exe_symbol_info = (symbol_info_t*)malloc(symbol_info_total_size); + symbol_info_t* code_symbol_info= (symbol_info_t*)malloc(symbol_info_total_size); + memset(exe_symbol_info, 0, symbol_info_total_size); + memset(code_symbol_info, 0, symbol_info_total_size); + + status = hsa_code_object_iterate_symbols(code_object, callback_fill_code_symbol_info, code_symbol_info); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_iterate_symbols(executable, callback_fill_executable_symbol_info, exe_symbol_info); + ASSERT(HSA_STATUS_SUCCESS == status); + + int cmp = memcmp(exe_symbol_info, code_symbol_info, symbol_info_total_size); + ASSERT(0 == cmp); + + free(exe_symbol_info); + free(code_symbol_info); + + // Destroy the executable, and the code object + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_kernarg_alignment.c b/src/core/code/test_code_kernarg_alignment.c new file mode 100755 index 0000000..db95bfe --- /dev/null +++ b/src/core/code/test_code_kernarg_alignment.c @@ -0,0 +1,405 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_kernarg_alignment + * Scope: Conformance + * + * Purpose: Ensures that symbol's declared with module scope + * and program scope can be extracted from a code object + * and an executable and, if the symbols are kernels, + * dispatched to a user mode queue. + * + * Test Description: + * 1. Create a code object by loading the kernarg_alignment Brig module + * adding it to a program and finalizing it. + * 2. Use the hsa_code_object_get_symbol API and the kernels' module + * qualified and program scope symbol name to obtain the + * code object's associated symbol. + * 3. Create an executable for each agent that supports kernel dispatch + * using the code object. + * 4. Use the hsa_executable_get_symbol API and the kernels' module qualified + * and program scope symbol name's to obtain the executable's associated symbol. + * 5. Each kernal has different kernarg segment alignment + * 6. Execute the kernels on the agent. + * + * Expected Result: The kernels should execute correctly on each agent. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DATA_SIZE 256 +#define FIRST_ARG 2 +#define SECOND_ARG 3 +#define EXPECTED_OUTPUT 5 + +const int num_kernels = 144; + +char* kernarg_align_symbol_names[] = { + "&__kernarg_8_u64_8_u64_kernel", + "&__kernarg_8_u64_16_u64_kernel", + "&__kernarg_8_u64_32_u64_kernel", + "&__kernarg_8_u64_64_u64_kernel", + "&__kernarg_8_u64_128_u64_kernel", + "&__kernarg_8_u64_256_u64_kernel", + "&__kernarg_16_u64_8_u64_kernel", + "&__kernarg_16_u64_16_u64_kernel", + "&__kernarg_16_u64_32_u64_kernel", + "&__kernarg_16_u64_64_u64_kernel", + "&__kernarg_16_u64_128_u64_kernel", + "&__kernarg_16_u64_256_u64_kernel", + "&__kernarg_32_u64_8_u64_kernel", + "&__kernarg_32_u64_16_u64_kernel", + "&__kernarg_32_u64_32_u64_kernel", + "&__kernarg_32_u64_64_u64_kernel", + "&__kernarg_32_u64_128_u64_kernel", + "&__kernarg_32_u64_256_u64_kernel", + "&__kernarg_64_u64_8_u64_kernel", + "&__kernarg_64_u64_16_u64_kernel", + "&__kernarg_64_u64_32_u64_kernel", + "&__kernarg_64_u64_64_u64_kernel", + "&__kernarg_64_u64_128_u64_kernel", + "&__kernarg_64_u64_256_u64_kernel", + "&__kernarg_128_u64_8_u64_kernel", + "&__kernarg_128_u64_16_u64_kernel", + "&__kernarg_128_u64_32_u64_kernel", + "&__kernarg_128_u64_64_u64_kernel", + "&__kernarg_128_u64_128_u64_kernel", + "&__kernarg_128_u64_256_u64_kernel", + "&__kernarg_256_u64_8_u64_kernel", + "&__kernarg_256_u64_16_u64_kernel", + "&__kernarg_256_u64_32_u64_kernel", + "&__kernarg_256_u64_64_u64_kernel", + "&__kernarg_256_u64_128_u64_kernel", + "&__kernarg_256_u64_256_u64_kernel", + "&__kernarg_8_u64_8_u32_kernel", + "&__kernarg_8_u64_16_u32_kernel", + "&__kernarg_8_u64_32_u32_kernel", + "&__kernarg_8_u64_64_u32_kernel", + "&__kernarg_8_u64_128_u32_kernel", + "&__kernarg_8_u64_256_u32_kernel", + "&__kernarg_16_u64_8_u32_kernel", + "&__kernarg_16_u64_16_u32_kernel", + "&__kernarg_16_u64_32_u32_kernel", + "&__kernarg_16_u64_64_u32_kernel", + "&__kernarg_16_u64_128_u32_kernel", + "&__kernarg_16_u64_256_u32_kernel", + "&__kernarg_32_u64_8_u32_kernel", + "&__kernarg_32_u64_16_u32_kernel", + "&__kernarg_32_u64_32_u32_kernel", + "&__kernarg_32_u64_64_u32_kernel", + "&__kernarg_32_u64_128_u32_kernel", + "&__kernarg_32_u64_256_u32_kernel", + "&__kernarg_64_u64_8_u32_kernel", + "&__kernarg_64_u64_16_u32_kernel", + "&__kernarg_64_u64_32_u32_kernel", + "&__kernarg_64_u64_64_u32_kernel", + "&__kernarg_64_u64_128_u32_kernel", + "&__kernarg_64_u64_256_u32_kernel", + "&__kernarg_128_u64_8_u32_kernel", + "&__kernarg_128_u64_16_u32_kernel", + "&__kernarg_128_u64_32_u32_kernel", + "&__kernarg_128_u64_64_u32_kernel", + "&__kernarg_128_u64_128_u32_kernel", + "&__kernarg_128_u64_256_u32_kernel", + "&__kernarg_256_u64_8_u32_kernel", + "&__kernarg_256_u64_16_u32_kernel", + "&__kernarg_256_u64_32_u32_kernel", + "&__kernarg_256_u64_64_u32_kernel", + "&__kernarg_256_u64_128_u32_kernel", + "&__kernarg_256_u64_256_u32_kernel", + "&__kernarg_8_u32_8_u64_kernel", + "&__kernarg_8_u32_16_u64_kernel", + "&__kernarg_8_u32_32_u64_kernel", + "&__kernarg_8_u32_64_u64_kernel", + "&__kernarg_8_u32_128_u64_kernel", + "&__kernarg_8_u32_256_u64_kernel", + "&__kernarg_16_u32_8_u64_kernel", + "&__kernarg_16_u32_16_u64_kernel", + "&__kernarg_16_u32_32_u64_kernel", + "&__kernarg_16_u32_64_u64_kernel", + "&__kernarg_16_u32_128_u64_kernel", + "&__kernarg_16_u32_256_u64_kernel", + "&__kernarg_32_u32_8_u64_kernel", + "&__kernarg_32_u32_16_u64_kernel", + "&__kernarg_32_u32_32_u64_kernel", + "&__kernarg_32_u32_64_u64_kernel", + "&__kernarg_32_u32_128_u64_kernel", + "&__kernarg_32_u32_256_u64_kernel", + "&__kernarg_64_u32_8_u64_kernel", + "&__kernarg_64_u32_16_u64_kernel", + "&__kernarg_64_u32_32_u64_kernel", + "&__kernarg_64_u32_64_u64_kernel", + "&__kernarg_64_u32_128_u64_kernel", + "&__kernarg_64_u32_256_u64_kernel", + "&__kernarg_128_u32_8_u64_kernel", + "&__kernarg_128_u32_16_u64_kernel", + "&__kernarg_128_u32_32_u64_kernel", + "&__kernarg_128_u32_64_u64_kernel", + "&__kernarg_128_u32_128_u64_kernel", + "&__kernarg_128_u32_256_u64_kernel", + "&__kernarg_256_u32_8_u64_kernel", + "&__kernarg_256_u32_16_u64_kernel", + "&__kernarg_256_u32_32_u64_kernel", + "&__kernarg_256_u32_64_u64_kernel", + "&__kernarg_256_u32_128_u64_kernel", + "&__kernarg_256_u32_256_u64_kernel", + "&__kernarg_8_u32_8_u32_kernel", + "&__kernarg_8_u32_16_u32_kernel", + "&__kernarg_8_u32_32_u32_kernel", + "&__kernarg_8_u32_64_u32_kernel", + "&__kernarg_8_u32_128_u32_kernel", + "&__kernarg_8_u32_256_u32_kernel", + "&__kernarg_16_u32_8_u32_kernel", + "&__kernarg_16_u32_16_u32_kernel", + "&__kernarg_16_u32_32_u32_kernel", + "&__kernarg_16_u32_64_u32_kernel", + "&__kernarg_16_u32_128_u32_kernel", + "&__kernarg_16_u32_256_u32_kernel", + "&__kernarg_32_u32_8_u32_kernel", + "&__kernarg_32_u32_16_u32_kernel", + "&__kernarg_32_u32_32_u32_kernel", + "&__kernarg_32_u32_64_u32_kernel", + "&__kernarg_32_u32_128_u32_kernel", + "&__kernarg_32_u32_256_u32_kernel", + "&__kernarg_64_u32_8_u32_kernel", + "&__kernarg_64_u32_16_u32_kernel", + "&__kernarg_64_u32_32_u32_kernel", + "&__kernarg_64_u32_64_u32_kernel", + "&__kernarg_64_u32_128_u32_kernel", + "&__kernarg_64_u32_256_u32_kernel", + "&__kernarg_128_u32_8_u32_kernel", + "&__kernarg_128_u32_16_u32_kernel", + "&__kernarg_128_u32_32_u32_kernel", + "&__kernarg_128_u32_64_u32_kernel", + "&__kernarg_128_u32_128_u32_kernel", + "&__kernarg_128_u32_256_u32_kernel", + "&__kernarg_256_u32_8_u32_kernel", + "&__kernarg_256_u32_16_u32_kernel", + "&__kernarg_256_u32_32_u32_kernel", + "&__kernarg_256_u32_64_u32_kernel", + "&__kernarg_256_u32_128_u32_kernel", + "&__kernarg_256_u32_256_u32_kernel", +}; + +const uint32_t first_offset[] = {8,8,8,8,8,8,16,16,16,16,16,16,32,32,32,32,32,32,64,64,64,64,64,64,128,128,128,128,128,128,256,256,256,256,256,256,8,8,8,8,8,8,16,16,16,16,16,16,32,32,32,32,32,32,64,64,64,64,64,64,128,128,128,128,128,128,256,256,256,256,256,256,8,8,8,8,8,8,16,16,16,16,16,16,32,32,32,32,32,32,64,64,64,64,64,64,128,128,128,128,128,128,256,256,256,256,256,256,8,8,8,8,8,8,16,16,16,16,16,16,32,32,32,32,32,32,64,64,64,64,64,64,128,128,128,128,128,128,256,256,256,256,256,256}; + +const uint32_t second_offset[] = {16,16,32,64,128,256,24,32,32,64,128,256,40,48,64,64,128,256,72,80,96,128,128,256,136,144,160,192,256,256,264,272,288,320,384,512,16,16,32,64,128,256,24,32,32,64,128,256,40,48,64,64,128,256,72,80,96,128,128,256,136,144,160,192,256,256,264,272,288,320,384,512,16,16,32,64,128,256,24,32,32,64,128,256,40,48,64,64,128,256,72,80,96,128,128,256,136,144,160,192,256,256,264,272,288,320,384,512,16,16,32,64,128,256,24,32,32,64,128,256,40,48,64,64,128,256,72,80,96,128,128,256,136,144,160,192,256,256,264,272,288,320,384,512}; + +const size_t first_arg_size[] = {8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; + +const size_t second_arg_size[] = {8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; + +// All sizes must be a multiple of 16 +const uint32_t kernarg_size[] = {32,32,48,80,144,272,32,48,48,80,144,272,48,64,80,80,144,272,80,96,112,144,144,272,144,160,176,208,272,272,272,288,304,336,400,528,32,32,48,80,144,272,32,48,48,80,144,272,48,64,80,80,144,272,80,96,112,144,144,272,144,160,176,208,272,272,272,288,304,336,400,528,32,32,48,80,144,272,32,48,48,80,144,272,48,64,80,80,144,272,80,96,112,144,144,272,144,160,176,208,272,272,272,288,304,336,400,528,32,32,48,80,144,272,32,48,48,80,144,272,48,64,80,80,144,272,80,96,112,144,144,272,144,160,176,208,272,272,272,288,304,336,400,528}; + +int test_code_kernarg_alignment() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + exit; + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("kernarg_align.brig", &module)); + + // Get a list of agents, and iterate through the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS==status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL,UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + status = hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if(status != HSA_STATUS_INFO_BREAK) { + continue; + } + ASSERT((uint64_t)-1 != global_region.handle); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_ext_control_directives_t control_directives; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate data to be used for the kernel arguments + uint64_t* output; + status = hsa_memory_allocate(global_region, sizeof(uint64_t) * DATA_SIZE, (void**) &output); + + uint32_t* args_32bit; + status = hsa_memory_allocate(global_region, sizeof(uint32_t) * 2, (void**) &args_32bit); + + uint64_t* args_64bit; + status = hsa_memory_allocate(global_region, sizeof(uint64_t) * 2, (void**) &args_64bit); + + args_32bit[0] = FIRST_ARG; + args_32bit[1] = SECOND_ARG; + args_64bit[0] = FIRST_ARG; + args_64bit[1] = SECOND_ARG; + + int jj; + for (jj = 0; jj < num_kernels; ++jj) { + // Reset the output data + memset(output, 0, DATA_SIZE * sizeof(uint32_t)); + + // Get the target symbol information + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, &kernarg_align_symbol_names[jj], &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify that the kernarg size if expected + ASSERT_MSG(kernarg_size[jj] == symbol_record.kernarg_segment_size, "The kernel argument size isn't what was expected for the %s kernel. %d != %d\n", kernarg_align_symbol_names[jj], kernarg_size[jj], symbol_record.kernarg_segment_size); + + // Allocate the kernel argument buffer from the correct region + char* kernarg_start; + status = hsa_memory_allocate(kernarg_region, symbol_record.kernarg_segment_size, (void**) &kernarg_start); + ASSERT(HSA_STATUS_SUCCESS == status); + memset(kernarg_start, 0, symbol_record.kernarg_segment_size); + + char* kernarg_ptr = kernarg_start; + + // Setup the kernel arguments + memcpy(kernarg_ptr, &output, sizeof(uint32_t*)); + + kernarg_ptr = kernarg_start + first_offset[jj]; + if(first_arg_size[jj] == 4) { + memcpy(kernarg_ptr, &args_32bit[0], sizeof(uint32_t)); + } else { + memcpy(kernarg_ptr, &args_64bit[0], sizeof(uint64_t)); + } + + kernarg_ptr = kernarg_start + second_offset[jj]; + if(first_arg_size[jj] == 4) { + memcpy(kernarg_ptr, &args_32bit[1], sizeof(uint32_t)); + } else { + memcpy(kernarg_ptr, &args_64bit[1], sizeof(uint64_t)); + } + + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, DATA_SIZE, symbol_record.kernel_object, (void*) kernarg_start); + + // Verify the output data block is updated + int kk; + for(kk = 0; kk < DATA_SIZE; ++kk) { + ASSERT_MSG(EXPECTED_OUTPUT == output[kk], "Invalid value found in the %dth element : %d.\n",kk,output[kk]); + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_start); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Free output data and argument buffers + status = hsa_memory_free((void*) output); + status = hsa_memory_free((void*) args_32bit); + status = hsa_memory_free((void*) args_64bit); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS==status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_mixed_scope.c b/src/core/code/test_code_mixed_scope.c new file mode 100644 index 0000000..215d3e4 --- /dev/null +++ b/src/core/code/test_code_mixed_scope.c @@ -0,0 +1,290 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_mixed_scope + * Scope: Conformance + * + * Purpose: Ensures that symbol's declared with module scope + * and program scope can be extracted from a code object + * and an executable and, if the symbols are kernels, + * dispatched to a user mode queue. + * + * Test Description: + * 1. Create a code object by loading the mixed_scope Brig module + * adding it to a program and finalizing it. + * 2. Use the hsa_code_object_get_symbol API and the kernels' module + * qualified and program scope symbol name to obtain the + * code object's associated symbol. + * 3. Create an executable for each agent that supports kernel dispatch + * using the code object. + * 4. Use the hsa_executable_get_symbol API and the kernels' module qualified + * and program scope symbol name's to obtain the executable's associated symbol. + * 5. Execute the kernels on the agent. + * + * Expected Result: The kernels should execute correctly on each agent. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_code_mixed_scope() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("mixed_scope.brig", &module)); + + // Module names and symbol names + const int num_symbols = 2; + const char* symbol_names[2] = { + "&__vector_copy_kernel1", + "&__vector_copy_kernel2" + }; + const char* module_name = "&mixed_scope"; + const bool program_linkage[2] = {false, true}; + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS==status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL,UINT32_MAX,UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t data_size = 256; + + // Allocate data to be used as the input and output of the kernel + uint32_t* data_input = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + uint32_t* data_output = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, 2 * sizeof(uint32_t*), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) vector_copy_arg_s { + void* data_input; + void* data_output; + } vector_copy_arg_t; + vector_copy_arg_t args; + args.data_input = data_input; + args.data_output = data_output; + + // Setup the kernarg buffer + memcpy((void*)kernarg_buffer, &args, 2 * sizeof(uint32_t*)); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create the program + hsa_ext_program_t program; + memset(&program,0,sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, agent_isa, call_convention, control_directives, "", code_object_type, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + int jj; + for (jj = 0; jj < num_symbols; ++jj) { + // Query the symbol from the code_object + hsa_code_symbol_t code_symbol; + code_symbol.handle = (uint64_t)-1; + char code_object_symbol_name[256]; + if (!program_linkage[jj]) { + // Must be in the "&module_name::&symbol_name" format + sprintf(code_object_symbol_name, "%s::%s", module_name, symbol_names[jj]); + } else { + // Must be in "&symbol_name" format + sprintf(code_object_symbol_name, "%s", symbol_names[jj]); + } + status = hsa_code_object_get_symbol(code_object, code_object_symbol_name, &code_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query the symbol from the executable + hsa_executable_symbol_t executable_symbol; + executable_symbol.handle = (uint64_t)-1; + int32_t call_convention = 0; + const char* executable_module_name = NULL; + if (!program_linkage[jj]) { + executable_module_name = module_name; + continue; + } + status = hsa_executable_get_symbol(executable, + executable_module_name, + symbol_names[jj], + agent_list.agents[ii], + call_convention, + &executable_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + // Initialize the input data + uint32_t kk; + for (kk = 0; kk < data_size; ++kk) { + data_input[kk] = kk; + } + // Reset the output data + memset(data_output, 0, data_size * sizeof(uint32_t)); + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, data_size, kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + int cmp = memcmp(data_input, data_output, data_size * sizeof(uint32_t)); + ASSERT(0 == cmp); + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the input and output data + free(data_input); + free(data_output); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS==status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing the program should not affect the executable + pfn.hsa_ext_program_destroy(program); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_module_scope_symbol.c b/src/core/code/test_code_module_scope_symbol.c new file mode 100644 index 0000000..39e08c1 --- /dev/null +++ b/src/core/code/test_code_module_scope_symbol.c @@ -0,0 +1,277 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_module_scope_symbol + * Scope: Conformance + * + * Purpose: Ensures that a symbol declared with module scope + * can be extracted from a code object and an executable and, + * if the symbol is a kernel, dispatched to a user mode queue. + * + * Test Description: + * 1. Create a code object by loading the module_scope Brig module + * adding it to a program and finalizing it. + * 2. Use the hsa_code_object_get_symbol API and the kernel's module + * qualified symbol name to obtain the code object's associated symbol. + * 3. Create an executable for each agent that supports kernel dispatch + * using the code object. + * 4. Use the hsa_executable_get_symbol API and the kernel's module qualified + * symbol name to obtain the executable's associated symbol. + * 5. Execute the kernel on the agent. + * + * Expected Result: The kernel should execute correctly on each agent. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_code_module_scope_symbol() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("module_scope.brig", &module)); + + // Module name and symbol names + const char* module_name = "&module_scope"; + const char* symbol_name = "&__vector_copy_kernel"; + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS==status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL,UINT32_MAX,UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t data_size = 256; + + // Allocate data to be used as the input and output of the kernel + uint32_t* data_input = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + uint32_t* data_output = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, 2 * sizeof(uint32_t*), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) vector_copy_arg_s { + void* data_input; + void* data_output; + } vector_copy_arg_t; + vector_copy_arg_t args; + args.data_input = data_input; + args.data_output = data_output; + + // Setup the kernarg buffer + memcpy((void*)kernarg_buffer, &args, 2 * sizeof(uint32_t*)); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create the program + hsa_ext_program_t program; + memset(&program,0,sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, agent_isa, call_convention, control_directives, "", code_object_type, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing the program should not affect the executable + pfn.hsa_ext_program_destroy(program); + + + // Query the symbol from the code_object + hsa_code_symbol_t code_symbol; + code_symbol.handle = (uint64_t)-1; + + // Must be in the "&module_name::&symbol_name" format + char code_object_symbol_name[256]; + sprintf(code_object_symbol_name, "%s::%s", module_name, symbol_name); + status = hsa_code_object_get_symbol(code_object, code_object_symbol_name, &code_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query the symbol from the executable + hsa_executable_symbol_t executable_symbol; + executable_symbol.handle = (uint64_t)-1; + int32_t unused_call_convention = 0; + const char* executable_module_name = "&module_scope"; + char executable_symbol_name[256]; + status = hsa_executable_get_symbol(executable, + executable_module_name, + symbol_name, + agent_list.agents[ii], + unused_call_convention, + &executable_symbol); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the input data + uint32_t kk; + for (kk = 0; kk < data_size; ++kk) { + data_input[kk] = kk; + } + + // Reset the output data + memset(data_output, 0, data_size * sizeof(uint32_t)); + + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, data_size, kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + int cmp = memcmp(data_input, data_output, data_size * sizeof(uint32_t)); + ASSERT(0 == cmp); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the input and output data + free(data_input); + free(data_output); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS==status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_multiple_executables.c b/src/core/code/test_code_multiple_executables.c new file mode 100644 index 0000000..fc9d18f --- /dev/null +++ b/src/core/code/test_code_multiple_executables.c @@ -0,0 +1,252 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_multiple_executables + * Scope: Conformance + * + * Purpose: Ensures that a single code object can be loaded + * into several distinct executables, allowing each executable + * to have its own allocation. + * + * Test Description: + * 1. Create a code object by finalizing the global_variable kernel. + * 2. For each agent that supports kernel dispatch, create + * several executables and load the code object into each. + * 3. For each executable define the global object using the + * hsa_executable_agent_global_variable_define API; use a different + * address for each executable. + * 4. Extract a kernel that uses the global variable. + * 5. Execute the kernel on the agent. + * + * Expected Result: The kernel should execute correctly on each agent. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_code_multiple_executables() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("global_vector_copy.brig", &module)); + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, agent_isa, call_convention, control_directives, "", code_object_type, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Repeat for multiple executables + uint32_t num_executables = 8; + int kk; + for (kk = 0; kk < num_executables; ++kk) { + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t data_size = 256; + uint32_t* global_data; + global_data = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Define this global variable + status = hsa_executable_global_variable_define(executable, "&b", (void*)global_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate data to be used as the input of the kernel + uint32_t* data_input; + data_input = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + // Initialize the data + uint32_t jj; + for (jj = 0; jj < data_size; ++jj) { + data_input[jj] = jj; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(uint32_t*), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + // Setup the kernarg buffer + memcpy((void*)kernarg_buffer, &data_input, sizeof(uint32_t*)); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + char* symbol_names[1]; + symbol_names[0] = "&__global_vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, data_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + int cmp_results = memcmp(data_input, global_data, sizeof(uint32_t) * data_size); + ASSERT(0 == cmp_results); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the global data + free(global_data); + + // Free the input data + free(data_input); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + + // Releasing the program should not affect the executable + pfn.hsa_ext_program_destroy(program); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_program_scope_symbol.c b/src/core/code/test_code_program_scope_symbol.c new file mode 100644 index 0000000..d94b4c2 --- /dev/null +++ b/src/core/code/test_code_program_scope_symbol.c @@ -0,0 +1,274 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_program_scope_symbol + * Scope: Conformance + * + * Purpose: Ensures that a symbol declared with program scope + * can be extracted from a code object and an executable and, + * if the symbol is a kernel, dispatched to a user mode queue. + * + * Test Description: + * 1. Create a code object by loading the program_scope Brig module + * adding it to a program and finalizing it. + * 2. Use the hsa_code_object_get_symbol API and the kernel's + * symbol name to obtain the code object's associated symbol. + * 3. Create an executable for each agent that supports kernel dispatch + * using the code object. + * 4. Use the hsa_executable_get_symbol API and the kernel's + * symbol name to obtain the executable's associated symbol. + * 5. Execute the kernel on the agent. + * + * Expected Result: The kernel should execute correctly on each agent. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_code_program_scope_symbol() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("program_scope.brig", &module)); + + // Module name and symbol names + const char* module_name = "&module3"; + const char* symbol_name = "&__vector_copy_kernel"; + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS==status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Create a queue for dispatching + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL,UINT32_MAX,UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + const uint32_t data_size = 256; + + // Allocate data to be used as the input and output of the kernel + uint32_t* data_input = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + uint32_t* data_output = (uint32_t*)malloc(sizeof(uint32_t) * data_size); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, 2 * sizeof(uint32_t*), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) vector_copy_arg_s { + void* data_input; + void* data_output; + } vector_copy_arg_t; + vector_copy_arg_t args; + args.data_input = data_input; + args.data_output = data_output; + + // Setup the kernarg buffer + memcpy((void*)kernarg_buffer, &args, 2 * sizeof(uint32_t*)); + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_code_object_t code_object; + hsa_executable_t executable; + hsa_code_object_type_t code_object_type = HSA_CODE_OBJECT_TYPE_PROGRAM; + int32_t call_convention = 0; + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create the program + hsa_ext_program_t program; + memset(&program,0,sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, agent_isa, call_convention, control_directives, "", code_object_type, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + //Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + //Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing the program should not affect the executable + pfn.hsa_ext_program_destroy(program); + + + //Query the symbol from the code_object + hsa_code_symbol_t code_symbol; + code_symbol.handle = (uint64_t)-1; + // Must be in the "&symbol_name" format + char code_object_symbol_name[256]; + sprintf(code_object_symbol_name, "%s", symbol_name); + status = hsa_code_object_get_symbol(code_object, code_object_symbol_name, &code_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query the symbol from the executable + hsa_executable_symbol_t executable_symbol; + executable_symbol.handle = (uint64_t)-1; + int32_t unused_call_convention = 0; + const char* executable_module_name = NULL; + char executable_symbol_name[256]; + //Must be in the "&symbol_name" format + sprintf(executable_symbol_name, "%s", symbol_name); + status = hsa_executable_get_symbol(executable, + executable_module_name, + executable_symbol_name, + agent_list.agents[ii], + unused_call_convention, + &executable_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + // Initialize the input data + uint32_t kk; + for (kk = 0; kk < data_size; ++kk) { + data_input[kk] = kk; + } + // Reset the output data + memset(data_output, 0, data_size * sizeof(uint32_t)); + // Launch the vector_copy kernel + dispatch_kernel_1d_data(queue, data_size, kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + int cmp = memcmp(data_input, data_output, data_size * sizeof(uint32_t)); + ASSERT(0 == cmp); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the input and output data + free(data_input); + free(data_output); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS==status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_code_recursive_kernel_function.c b/src/core/code/test_code_recursive_kernel_function.c new file mode 100644 index 0000000..19bb60e --- /dev/null +++ b/src/core/code/test_code_recursive_kernel_function.c @@ -0,0 +1,207 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +#define BLOCK_SIZE 1024 +#define RECURSE_COUNT 16 + +int test_code_recursive_kernel_function() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find an agent that supports kernel dispatch + hsa_agent_t agent; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + if(status == HSA_STATUS_INFO_BREAK) { status = HSA_STATUS_SUCCESS; } + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue for dispatch + hsa_queue_t* queue; + status = hsa_queue_create(agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent, get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent, get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Load the BRIG module + hsa_ext_module_t module; + load_module_from_file("recursive_func.brig", &module); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent, + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the executable symbols for dispatch + char* symbol_name = "&__recursive_func_kernel"; + symbol_record_t symbol_record; + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Define the argument structure + struct args_t { + uint32_t* out; + uint32_t in0; + uint32_t in1; + } args; + + // Allocate and initialize the kernel arguments and data. + args.in0 = 0; + args.in1 = RECURSE_COUNT; + + status = hsa_memory_allocate(global_region, BLOCK_SIZE * sizeof(uint32_t), (void**) &(args.out)); + ASSERT(HSA_STATUS_SUCCESS == status); + memset(args.out, 0, BLOCK_SIZE * sizeof(uint32_t)); + + // Allocate the kernel argument buffer from the correct region. + char* kernarg_buffer; + status = hsa_memory_allocate(kernarg_region, symbol_record.kernarg_segment_size, (void**) &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + memcpy(kernarg_buffer, &args, symbol_record.kernarg_segment_size); + + // Create a completion signal + hsa_signal_t completion_signal; + status=hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the dispatch packet. + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = BLOCK_SIZE; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Wait on the completion signal + hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Validate the data in the output buffer + int ii; + for(ii=0;ii, + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * Test Name: test_code_serialize_deserialize + * Scope: Conformance + * + * Purpose: Serialize a code object to a binary blob, and then + * deserialize the blob back into a code object. + * + * Test Description: + * 1. Create a code object by loading the no_op Brig module. + * 2. For each agent that supports kernel dispatch, perform the followings. + * 3. Finalizing the module with the profile and machine model match to the + * current agent. + * 4. Serialize the code object, using valid callbacks and parameters to + * perform the serialization. + * 5. Extract and compare the symbols from code objects and Executable before and after serialize-deserialize for their correctness in various attributes + * 6. Destroy the original code object. + * 7. Deserialize the serialized data to create a new code object. + * 8. + * a) Create an executable and load the code object. + * b) Extract the no_op kernel from the executable. + * c) Dispatch the executable on the agent. + * 9. Destroy the Brig module. + * + * Expected Result: The final code object should be usable + * and the associated symbols should be dispatchable on + * all agents.And all the code object and excutable symbols should be matching after serialize-deserialize as well + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_code_serialize_deserialize() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get a list of agents, and iterate throught the list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip if this agent does not support kernel dispatch + uint32_t feature = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH != feature) { + continue; + } + + // Get the ISA from this agent + hsa_isa_t agent_isa; + agent_isa.handle = (uint64_t)-1; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_ISA, &agent_isa); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != agent_isa.handle); + + // Get machine model and profile to create a program + hsa_machine_model_t machine_model; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + ASSERT(HSA_STATUS_SUCCESS == status); + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the brig modules to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + memset(&code_object, 0, sizeof(hsa_code_object_t)); + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + status = pfn.hsa_ext_program_finalize(program, agent_isa, 0, control_directives, NULL, HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the code object + status = hsa_executable_load_code_object(executable, agent_list.agents[ii], code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info from code object and executable before serialzation + const int num_symbols = 1; + char* symbol_names[1], name_1[1]; + char* code_object_symbol_name[1]; + symbol_names[0] = "&__no_op_kernel"; + code_object_symbol_name[0] = "&__no_op_kernel"; + int jj; + struct symbol_attribute_info_s { + const char name[1]; + uint32_t name_length; + uint32_t module_name_length; + char module_name[1]; + const bool is_definition; + uint32_t variable_size; + const bool is_variable_const; + uint32_t kernel_kernarg_seg_size; + uint32_t kernel_group_seg_size; + uint32_t kernel_private_seg_size; + }; + + uint64_t kernel_object_1; + const bool program_linkage[2] = {false, true}; + struct symbol_attribute_info_s *symbol_attribute_info[num_symbols]; + struct symbol_attribute_info_s *executable_symbol_attribute_info[num_symbols]; + for (jj = 0; jj < num_symbols; ++jj) { + // Query the symbol from the code_object + hsa_code_symbol_t code_symbol; + symbol_attribute_info[jj] = (struct symbol_attribute_info_s *)malloc(sizeof(struct symbol_attribute_info_s)); + executable_symbol_attribute_info[jj] = (struct symbol_attribute_info_s *)malloc(sizeof(struct symbol_attribute_info_s)); + code_symbol.handle = (uint64_t)-1; + status = hsa_code_object_get_symbol(code_object, symbol_names[jj], &code_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query the symbol info from code_object + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME, (void *)symbol_attribute_info[jj]->name); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME_LENGTH, (void*) &(symbol_attribute_info[jj]->name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH, (void*) &(symbol_attribute_info[jj]->module_name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME, (void*) &(symbol_attribute_info[jj]->module_name)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_IS_DEFINITION, (void*) &(symbol_attribute_info[jj]->is_definition)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, (void*) &(symbol_attribute_info[jj]->kernel_kernarg_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, (void*) &(symbol_attribute_info[jj]->kernel_group_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, (void*) &(symbol_attribute_info[jj]->kernel_private_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the symbol from the executable + hsa_executable_symbol_t executable_symbol; + executable_symbol.handle = (uint64_t)-1; + int32_t call_convention = 0; + const char* executable_module_name = NULL; + status = hsa_executable_get_symbol(executable, + executable_module_name, + symbol_names[jj], + agent_list.agents[ii], + call_convention, + &executable_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the symbol info from the executable + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, (void*) executable_symbol_attribute_info[jj]->name); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, (void*) &(executable_symbol_attribute_info[jj]->name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH, (void*) &(executable_symbol_attribute_info[jj]->module_name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME, (void*) &(executable_symbol_attribute_info[jj]->module_name)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION, (void*) &(executable_symbol_attribute_info[jj]->is_definition)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, (void*) &(executable_symbol_attribute_info[jj]->kernel_kernarg_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, (void*) &(executable_symbol_attribute_info[jj]->kernel_group_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, (void*) &(executable_symbol_attribute_info[jj]->kernel_private_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + + } + + // Serialize the code object + void* serialized_blob; + size_t serialized_size; + hsa_callback_data_t callback_data; + callback_data.handle = (uint64_t)-1; + status = hsa_code_object_serialize(code_object, callback_serialize_alloc, + callback_data, NULL, &serialized_blob, &serialized_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Deserialize the memory blob + hsa_code_object_t deserialized_code_object; + status = hsa_code_object_deserialize(serialized_blob, + serialized_size, NULL, &deserialized_code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the memory blob + free(serialized_blob); + // Free the original code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable_1; + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable_1); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the deserialized code object + status = hsa_executable_load_code_object(executable_1, agent_list.agents[ii], deserialized_code_object, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable_1, ""); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + status = get_executable_symbols(executable_1, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Extract the symbols info for comaprision on serialize-deserialized code object + struct symbol_attribute_info_s *symbol_attribute_info_after[num_symbols]; + struct symbol_attribute_info_s *executable_symbol_attribute_info_after[num_symbols]; + for (jj = 0; jj < num_symbols; ++jj) { + symbol_attribute_info_after[jj] = (struct symbol_attribute_info_s *)malloc(sizeof(struct symbol_attribute_info_s)); + executable_symbol_attribute_info_after[jj] = (struct symbol_attribute_info_s *)malloc(sizeof(struct symbol_attribute_info_s)); + // Query the symbol from the deserialized code_object + hsa_code_symbol_t code_symbol; + code_symbol.handle = (uint64_t)-1; + status = hsa_code_object_get_symbol(deserialized_code_object, symbol_names[jj], &code_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the symbol info on deserialized code object + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME, (void*) &(symbol_attribute_info_after[jj]->name)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_NAME_LENGTH, (void*) &(symbol_attribute_info_after[jj]->name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH, (void*) &(symbol_attribute_info_after[jj]->module_name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_MODULE_NAME, (void*) &(symbol_attribute_info_after[jj]->module_name)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_IS_DEFINITION, (void*) &(symbol_attribute_info_after[jj]->is_definition)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, (void*) &(symbol_attribute_info_after[jj]->kernel_kernarg_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, (void*) &(symbol_attribute_info_after[jj]->kernel_group_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_symbol_get_info(code_symbol, HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, (void*) &(symbol_attribute_info_after[jj]->kernel_private_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query the symbol from the deserialized code object executable + hsa_executable_symbol_t executable_symbol; + executable_symbol.handle = (uint64_t)-1; + int32_t call_convention = 0; + const char* executable_module_name = NULL; + status = hsa_executable_get_symbol(executable_1, + executable_module_name, + symbol_names[jj], + agent_list.agents[ii], + call_convention, + &executable_symbol); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, (void*) &(executable_symbol_attribute_info_after[jj]->name)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, (void*) &(executable_symbol_attribute_info_after[jj]->name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH, (void*) &(executable_symbol_attribute_info_after[jj]->module_name_length)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME, (void*) &(executable_symbol_attribute_info_after[jj]->module_name)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION, (void*) &(executable_symbol_attribute_info_after[jj]->is_definition)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, (void*) &(executable_symbol_attribute_info_after[jj]->kernel_kernarg_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, (void*) &(executable_symbol_attribute_info_after[jj]->kernel_group_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_symbol_get_info(executable_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, (void*) &(executable_symbol_attribute_info_after[jj]->kernel_private_seg_size)); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Compare the symbols info before and after serialization/deserialization on both code object and executable + for (jj = 0; jj < num_symbols; ++jj) { + // Compare Code object symbol attribute values + if ((strcmp(symbol_attribute_info[jj]->name, symbol_attribute_info_after[jj]->name) == 0) && + (strcmp(symbol_attribute_info[jj]->module_name, symbol_attribute_info_after[jj]->module_name) == 0) && + (symbol_attribute_info[jj]->name_length == symbol_attribute_info_after[jj]->name_length) && + (symbol_attribute_info[jj]->module_name_length == symbol_attribute_info_after[jj]->module_name_length) && + (symbol_attribute_info[jj]->is_definition == symbol_attribute_info_after[jj]->is_definition)&& + (symbol_attribute_info[jj]->kernel_kernarg_seg_size == symbol_attribute_info_after[jj]->kernel_kernarg_seg_size) && + (symbol_attribute_info[jj]->kernel_group_seg_size == symbol_attribute_info_after[jj]->kernel_group_seg_size) && + (symbol_attribute_info[jj]->kernel_private_seg_size == symbol_attribute_info_after[jj]->kernel_private_seg_size)) { + // The comparisons passed. + } else { + ASSERT_MSG(0,"Not all of the symbol attributes generated from the deserialized code object matched the original."); + } + + // Compare executable symbol attribute values + if ((strcmp(executable_symbol_attribute_info[jj]->name, executable_symbol_attribute_info_after[jj]->name) == 0) && + (strcmp(executable_symbol_attribute_info[jj]->module_name, executable_symbol_attribute_info_after[jj]->module_name) == 0) && + (executable_symbol_attribute_info[jj]->name_length == executable_symbol_attribute_info_after[jj]->name_length) && + (executable_symbol_attribute_info[jj]->module_name_length == executable_symbol_attribute_info_after[jj]->module_name_length) && + (executable_symbol_attribute_info[jj]->is_definition == executable_symbol_attribute_info_after[jj]->is_definition)&& + (executable_symbol_attribute_info[jj]->kernel_kernarg_seg_size == executable_symbol_attribute_info_after[jj]->kernel_kernarg_seg_size) && + (executable_symbol_attribute_info[jj]->kernel_group_seg_size == executable_symbol_attribute_info_after[jj]->kernel_group_seg_size) && + (executable_symbol_attribute_info[jj]->kernel_private_seg_size == executable_symbol_attribute_info_after[jj]->kernel_private_seg_size) ) { + } else { + ASSERT_MSG(0,"Not all of the symbol attributes generated from the deserialized executable matched the original."); + } + } + + // Use deserialized code object to dispatch a kernel + // Queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Signal + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + + // Fill info for the default dispatch_packet + memset(&dispatch_packet, 0, packet_size); + dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal = signal; + + // Enqueue dispatch packets + hsa_kernel_dispatch_packet_t* queue_packet; + // Increment the write index of the queue + uint64_t write_index = hsa_queue_add_write_index_relaxed(queue, 1); + // Obtain the address of the queue packet entry + queue_packet = (hsa_kernel_dispatch_packet_t*)(queue->base_address + write_index * packet_size); + // Copy the initialized packet to the queue packet entry + memcpy(queue_packet, &dispatch_packet, packet_size); + // Set the queue packet entries header.type value to HSA_PACKET_TYPE_KERNEL_DISPATCH + // This allows the command processor to process this packet. + queue_packet->header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + // Ring the doorbell + hsa_signal_store_relaxed(queue->doorbell_signal, write_index); + + // Wait until all dispatch packets finish executing + hsa_signal_value_t value = hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + ASSERT(0 == value); + + // Destroy signal + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable, program, and the deserialized code object + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_executable_destroy(executable_1); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(deserialized_code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + for(jj = 0; jj < num_symbols; ++jj) { + free(symbol_attribute_info[jj]); + free(executable_symbol_attribute_info[jj]); + free(symbol_attribute_info_after[jj]); + free(executable_symbol_attribute_info_after[jj]); + } + } + + free_agent_list(&agent_list); + + destroy_module(module); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/code/test_helper_func.c b/src/core/code/test_helper_func.c new file mode 100644 index 0000000..16ebdb2 --- /dev/null +++ b/src/core/code/test_helper_func.c @@ -0,0 +1,56 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +hsa_status_t callback_serialize_alloc(size_t size, hsa_callback_data_t data, void** address) { + *address = malloc(size); + return HSA_STATUS_SUCCESS; +} diff --git a/src/core/code/test_helper_func.h b/src/core/code/test_helper_func.h new file mode 100644 index 0000000..9ea1758 --- /dev/null +++ b/src/core/code/test_helper_func.h @@ -0,0 +1,53 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _TEST_HELPER_FUNC_H_ +#define _TEST_HELPER_FUNC_H_ + +#include + +hsa_status_t callback_serialize_alloc(size_t size, hsa_callback_data_t data, void** address); + +#endif // _TEST_HELPER_FUNC_H_ diff --git a/src/core/init/hsa_init.c b/src/core/init/hsa_init.c new file mode 100644 index 0000000..7dd47c6 --- /dev/null +++ b/src/core/init/hsa_init.c @@ -0,0 +1,64 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_init.h" + +DEFINE_TEST(concurrent_init); +DEFINE_TEST(concurrent_shutdown); +DEFINE_TEST(concurrent_init_shutdown); +DEFINE_TEST(refcount); +DEFINE_TEST(reinitialize); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(concurrent_init); + ADD_TEST(concurrent_shutdown); + ADD_TEST(concurrent_init_shutdown); + ADD_TEST(refcount); + ADD_TEST(reinitialize); + RUN_TESTS(); + return 0; +} diff --git a/src/core/init/hsa_init.h b/src/core/init/hsa_init.h new file mode 100644 index 0000000..3807d8b --- /dev/null +++ b/src/core/init/hsa_init.h @@ -0,0 +1,53 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_INIT_H_ +#define _HSA_INIT_H_ +extern int test_concurrent_init(); +extern int test_concurrent_shutdown(); +extern int test_concurrent_init_shutdown(); +extern int test_refcount(); +extern int test_reinitialize(); +#endif // _HSA_INIT_H_ diff --git a/src/core/init/test_concurrent_init.c b/src/core/init/test_concurrent_init.c new file mode 100644 index 0000000..81d7d9f --- /dev/null +++ b/src/core/init/test_concurrent_init.c @@ -0,0 +1,117 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: concurrent_init + * Scope: Conformance + * + * Purpose: Verifies that hsa_init is thread safe with respect + * to itself + * + * Test Description: + * 1) Create N threads, and from each thread + * a) Call hsa_init to initialize the HSA runtime + * 2) Join all the threads to the main thread. + * 3) Shutdown the runtime by calling hsa_shutdown N times. + * + * Expected Results: Verify that the query operation executes successfully + * and that no undefined behavior occurs. + * + */ + +#include +#include +#include +#include + +#define NUM_TESTS 100 + +void test_hsa_init_func(void *data) { + hsa_status_t status; + const char *err_str; + + // Initialize hsa runtime + status = hsa_init(); + hsa_status_string(status, &err_str); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "\nErr_code: %d Err_Str: %s\n", status, err_str); + return; +} + + +int test_concurrent_init() { + hsa_status_t status; + const char *err_str; + + // Create a test group + struct test_group * tg_concurr_init = test_group_create(NUM_TESTS); + + // Add test into the test group, each test has same test function - wrapper_hsa_init + test_group_add(tg_concurr_init, &test_hsa_init_func, NULL, NUM_TESTS); + + // Create threads for each test + test_group_thread_create(tg_concurr_init); + + // Start to run tests + test_group_start(tg_concurr_init); + + // Wait all tests finish + test_group_wait(tg_concurr_init); + + // Exit all tests + test_group_exit(tg_concurr_init); + + // Shutdown hsa runtime num_tests times + int ii; + for (ii = 0; ii < NUM_TESTS; ++ii) { + status = hsa_shut_down(); + hsa_status_string(status, &err_str); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "\nErr_code: %d Err_Str: %s\n", status, err_str); + } + + // Destroy tests, cleanup resources + test_group_destroy(tg_concurr_init); + + return 0; +} diff --git a/src/core/init/test_concurrent_init_shutdown.c b/src/core/init/test_concurrent_init_shutdown.c new file mode 100644 index 0000000..91deb60 --- /dev/null +++ b/src/core/init/test_concurrent_init_shutdown.c @@ -0,0 +1,117 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: concurrent_init_shutdown + * Scope: Conformance + * + * Purpose: Verifies that hsa_init and hsa_shutdown are thread safe + * + * Test Description: + * 1) Create several threads, and from each thread + * a) Call hsa_init to initialize the HSA runtime + * b) Query the agent list using hsa_iterate_agents + * c) Call hsa_shutdown + * 2) Repeat parallel execution of the threads several times. + * + * + * Expected Results: All of the threads should be able to initialize + * the runtime and query the agent list successfully. + */ + +#include +#include +#include + +#define NUM_TESTS 20 +#define NUM_ITER 100 + +void test_hsa_init_shutdown_func(void *data) { + hsa_status_t status; + + // Initialize hsa runtime + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Shutdown hsa runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return; +} + +int test_concurrent_init_shutdown() { + // Number of tests + int num_tests = NUM_TESTS; + + // Number of iterations + int num_iter = NUM_ITER; + + // Create a test group + struct test_group *tg_concurr_init = test_group_create(num_tests); + + // Add tests into the test group, each test has same test function - + // test_hsa_init_shutdown_func + test_group_add(tg_concurr_init, &test_hsa_init_shutdown_func, NULL, NUM_TESTS); + + // Create threads for each test + test_group_thread_create(tg_concurr_init); + + int ii; + for (ii = 0; ii < num_iter; ++ii) { + // Start to run tests + test_group_start(tg_concurr_init); + + // Wait all tests finish + test_group_wait(tg_concurr_init); + } + + // Exit all tests + test_group_exit(tg_concurr_init); + + // Destroy tests, cleanup resources + test_group_destroy(tg_concurr_init); + + return 0; +} diff --git a/src/core/init/test_concurrent_shutdown.c b/src/core/init/test_concurrent_shutdown.c new file mode 100644 index 0000000..73eb060 --- /dev/null +++ b/src/core/init/test_concurrent_shutdown.c @@ -0,0 +1,112 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: concurrent_shutdown + * Scope: Conformance + * + * Purpose: Verifies that hsa_shutdown is thread safe with respect + * to itself + * + * Test Description: + * 1) Call hsa_init N times in the main thread. + * 2) Query the agent list using hsa_iterate_agents. + * 3) Create N threads, and from each thread + * b) Call hsa_shutdown. + * 4) Join all the threads to the main thread. + * 5) Repeat this several times. + * + * Expected Results: Verify that all the query operation executes + * successfully and that no undefined behavior occurs. + * + */ + +#include +#include +#include + +#define NUM_TESTS 1000 +#define NUM_ITER 5 + +void test_hsa_shutdown_func(void *data) { + hsa_status_t status; + const char *err_str; + + // Shutdown hsa runtime + status = hsa_shut_down(); + hsa_status_string(status, &err_str); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "\nErr_code: %d Err_Str: %s\n", status, err_str); + return; +} + +int test_concurrent_shutdown() { + hsa_status_t status; + + // Create a test group + struct test_group *tg_concurr_shutdown = test_group_create(NUM_TESTS); + + // Add tests into the test group, each test has wrapper_hsa_shutdown + test_group_add(tg_concurr_shutdown, &test_hsa_shutdown_func, NULL, NUM_TESTS); + + // Create threads for each test + test_group_thread_create(tg_concurr_shutdown); + + int ii, jj; + for (jj = 0; jj < NUM_ITER; ++jj) { + // Initialize hsa runtime num_tests times + for (ii = 0; ii < NUM_TESTS; ++ii) { + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS==status); + } + + // Start tests - concurrently shutdown runtime + test_group_start(tg_concurr_shutdown); + test_group_wait(tg_concurr_shutdown); + } + + test_group_exit(tg_concurr_shutdown); + test_group_destroy(tg_concurr_shutdown); + return 0; +} diff --git a/src/core/init/test_refcount.c b/src/core/init/test_refcount.c new file mode 100644 index 0000000..7eaeaed --- /dev/null +++ b/src/core/init/test_refcount.c @@ -0,0 +1,90 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: refcount + * Scope: Conformance + * + * Purpose: Verifies that the hsa_init and hsa_shutdown APIs properly increment + * and decrement reference counting. + * + * Test Description: + * 1) Initialize the HSA runtime with hsa_init by calling that API N times, (N + * should be large). + * 2) Verify that the runtime is operational by querying the agent list. + * 3) Call hsa_shutdown N-1 times. + * 4) Again, verify the runtime is operational by querying the agent list. + * + * Expected Results: The runtime should remain operational when the reference + * count is positive. Repeated calls to hsa_init should not cause undefined behavior. + * + */ + +#include +#include + +#define N 1000 + +int test_refcount() { + hsa_status_t status; + + // Initialize hsa runtime N times + int ii; + for (ii = 0; ii < N; ++ii) { + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // Shutdown hsa runtime N - 1 times + for (ii = 0; ii < N-1; ++ii) { + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/init/test_reinitialize.c b/src/core/init/test_reinitialize.c new file mode 100644 index 0000000..44d9ef9 --- /dev/null +++ b/src/core/init/test_reinitialize.c @@ -0,0 +1,215 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: reinitialize + * + * Purpose: Verifies that the runtime can be initialized, closed and then + * reinitialized in a single process, and that no system resources + * are lost during this process. + * + * Test Description: + * 1) Initialize the runtime by calling hsa_init. + * 2) Create a queue. + * 3) Enqueue several simple kernels. + * 4) Shutdown the runtime using hsa_shutdown. + * 5) Repeat this several (thousand) times. + * + * Expected Results: The runtime should be properly reinitialized every time, and + * all resources should be functional after every reinitialization. + */ + +#include +#include +#include +#include + +#define NUM_ITER 128 +// The NUM_KERNELS must be a power of 2; the +// queue size depends on this value. +#define NUM_KERNELS 16 + +// Find agent that supports kernel dispatch +hsa_status_t get_dispatch_agent(hsa_agent_t agent, void* data) { + uint32_t features = 0; + hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &features); + if (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { + // Store agent in user−provided buffer and return + hsa_agent_t* ret = (hsa_agent_t*) data; + *ret = agent; + return HSA_STATUS_INFO_BREAK; + } + // Keep iterating + return HSA_STATUS_SUCCESS; +} + +int test_reinitialize() { + hsa_status_t status; + + // The number of times to repeat the test + int ii; + for (ii = 0; ii < NUM_ITER; ++ii) { + // Initialize runtime + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_agent_t agent; + // Get dispatchable agent + hsa_iterate_agents(get_dispatch_agent, &agent); + ASSERT(agent.handle != 0); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent, + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent, 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue + hsa_queue_t *queue; + status = hsa_queue_create(agent, (size_t) (2 * NUM_KERNELS), HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate and initialize a signal array + hsa_signal_t signals[NUM_KERNELS]; + + int jj; + for (jj = 0; jj < NUM_KERNELS; ++jj) { + status = hsa_signal_create(1, 0, NULL, &signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Fill in info for the default dispatch_packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal.handle = 0; + + // Enqueue dispatch packets + hsa_kernel_dispatch_packet_t* queue_packet; + for (jj = 0; jj < NUM_KERNELS; ++jj) { + dispatch_packet.completion_signal = signals[jj]; + enqueue_dispatch_packet(queue, &dispatch_packet); + } + + // Wait until all dispatch packets finish executing + for (jj = 0; jj < NUM_KERNELS; ++jj) { + hsa_signal_value_t value = hsa_signal_wait_relaxed(signals[jj], + HSA_SIGNAL_CONDITION_EQ, + 0, + UINT64_MAX, + HSA_WAIT_STATE_BLOCKED); + ASSERT(value == 0); + } + + // Destroy signals + for (jj = 0; jj < NUM_KERNELS; ++jj) { + status = hsa_signal_destroy(signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + return 0; +} diff --git a/src/core/memory/atomics/hsa_memory_atomics.c b/src/core/memory/atomics/hsa_memory_atomics.c new file mode 100644 index 0000000..3f562d7 --- /dev/null +++ b/src/core/memory/atomics/hsa_memory_atomics.c @@ -0,0 +1,78 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_memory_atomics.h" + +DEFINE_TEST(memory_add_atomic) +DEFINE_TEST(memory_and_atomic) +DEFINE_TEST(memory_cas_atomic) +DEFINE_TEST(memory_decrement_atomic) +DEFINE_TEST(memory_exchange_atomic) +DEFINE_TEST(memory_increment_atomic) +DEFINE_TEST(memory_load_store_atomic) +DEFINE_TEST(memory_maximum_atomic) +DEFINE_TEST(memory_minimum_atomic) +DEFINE_TEST(memory_or_atomic) +DEFINE_TEST(memory_subtract_atomic) +DEFINE_TEST(memory_xor_atomic) + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(memory_add_atomic); + ADD_TEST(memory_and_atomic); + ADD_TEST(memory_cas_atomic); + ADD_TEST(memory_decrement_atomic); + ADD_TEST(memory_exchange_atomic); + ADD_TEST(memory_increment_atomic); + ADD_TEST(memory_load_store_atomic); + ADD_TEST(memory_maximum_atomic); + ADD_TEST(memory_minimum_atomic); + ADD_TEST(memory_or_atomic); + ADD_TEST(memory_subtract_atomic); + ADD_TEST(memory_xor_atomic) + RUN_TESTS(); + return 0; +} diff --git a/src/core/memory/atomics/hsa_memory_atomics.h b/src/core/memory/atomics/hsa_memory_atomics.h new file mode 100644 index 0000000..4ac2237 --- /dev/null +++ b/src/core/memory/atomics/hsa_memory_atomics.h @@ -0,0 +1,62 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_MEMORY_ATOMICS_H_ +#define _HSA_MEMORY_ATOMICS_H_ + +extern int test_memory_add_atomic(); +extern int test_memory_and_atomic(); +extern int test_memory_cas_atomic(); +extern int test_memory_decrement_atomic(); +extern int test_memory_exchange_atomic(); +extern int test_memory_increment_atomic(); +extern int test_memory_load_store_atomic(); +extern int test_memory_maximum_atomic(); +extern int test_memory_minimum_atomic(); +extern int test_memory_or_atomic(); +extern int test_memory_subtract_atomic(); +extern int test_memory_xor_atomic(); + +#endif // _HSA_MEMORY_ATOMICS_H_ diff --git a/src/core/memory/atomics/test_helper_func.c b/src/core/memory/atomics/test_helper_func.c new file mode 100644 index 0000000..b6a334e --- /dev/null +++ b/src/core/memory/atomics/test_helper_func.c @@ -0,0 +1,129 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +void launch_memory_atomic_kernel( + hsa_agent_t* agent, + hsa_queue_t* queue, + symbol_record_t* symbol_record, + void* data, + void* value, + int num_kernel_instances) { + hsa_status_t status; + kernarg_memory_atomic_t arg; + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + hsa_agent_iterate_regions(*agent, get_kernarg_memory_region, &kernarg_region); + ASSERT(0 != kernarg_region.handle); + + // Allocate the kernel argument buffer from the correct region + kernarg_memory_atomic_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(arg), (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + arg.data = data; + arg.value = value; + memcpy(kernarg_buffer, &arg, sizeof(arg)); + + // Create the signal with initial value of "num_kernel_instances" + hsa_signal_t signal; + status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Request a new packet ID + uint64_t packet_id = hsa_queue_add_write_index_acquire(queue, 1); + + // Holding on not to write any new packet to the queue if the queue is full. + while (packet_id - hsa_queue_load_read_index_relaxed(queue) >= queue->size) {} + + // Compute packet offset + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)queue->base_address + + packet_id % queue->size; + + // Initialize the packet + memset(dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + + // Initialize the packet + dispatch_packet->completion_signal = signal; + dispatch_packet->setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = num_kernel_instances; + dispatch_packet->workgroup_size_y = 1; + dispatch_packet->workgroup_size_z = 1; + dispatch_packet->grid_size_x = num_kernel_instances; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->kernel_object = symbol_record->kernel_object; + dispatch_packet->kernarg_address = (void*) kernarg_buffer; + dispatch_packet->header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Wait until all the kernels are complete + while (0 != hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + // Destroy the signal + hsa_signal_destroy(signal); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} diff --git a/src/core/memory/atomics/test_helper_func.h b/src/core/memory/atomics/test_helper_func.h new file mode 100644 index 0000000..0954db8 --- /dev/null +++ b/src/core/memory/atomics/test_helper_func.h @@ -0,0 +1,65 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef TEST_HELPER_FUNC_H_ +#define TEST_HELPER_FUNC_H_ + +#include + +// The kernarg data structure +typedef struct __attribute__ ((aligned(16))) kernarg_memory_atomic_s { + void* data; + void* value; +} kernarg_memory_atomic_t; + +void launch_memory_atomic_kernel( + hsa_agent_t* agent, + hsa_queue_t* queue, + symbol_record_t* symbol_record, + void* data, + void* value, + int num_kernel_instances); + +#endif diff --git a/src/core/memory/atomics/test_memory_add_atomic.c b/src/core/memory/atomics/test_memory_add_atomic.c new file mode 100644 index 0000000..8f73179 --- /dev/null +++ b/src/core/memory/atomics/test_memory_add_atomic.c @@ -0,0 +1,298 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_add_atomic + * Purpose: Test that if an agent supports atomic add + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_add_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The add kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_add_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 32; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_rlx_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scacq_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_screl_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_add_global_scar_system_s64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with u32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0; + *value_uint32= 1; + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel executed correctly + uint32_t data_expected_u32 = (uint32_t) num_kernel_instances; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with u64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, 8); + memset(value, 0, 8); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0; + *value_uint64= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = (uint64_t) num_kernel_instances; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 4; + } + + + // Launch kernels with s32 data type + symbol_index = 2; + while (symbol_index < num_symbols) { + memset(data, 0, 8); + memset(value, 0, 8); + + int32_t* data_int32 = (int32_t*)data; + int32_t* value_int32= (int32_t*)value; + *data_int32 = 0; + *value_int32= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int32_t data_expected_s32 = (int32_t) num_kernel_instances; + + ASSERT(data_expected_s32 == *data_int32); + + // Increment the symbol index + symbol_index += 4; + } + + + // Launch kernels with s64 data type + symbol_index = 3; + while (symbol_index < num_symbols) { + memset(data, 0, 8); + memset(value, 0, 8); + + int64_t* data_int64 = (int64_t*)data; + int64_t* value_int64= (int64_t*)value; + *data_int64 = 0; + *value_int64= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int64_t data_expected_s64 = (int64_t) num_kernel_instances; + + ASSERT(data_expected_s64 == *data_int64); + + // Increment the symbol index + symbol_index += 4; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_and_atomic.c b/src/core/memory/atomics/test_memory_and_atomic.c new file mode 100644 index 0000000..081a8b7 --- /dev/null +++ b/src/core/memory/atomics/test_memory_and_atomic.c @@ -0,0 +1,239 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_and_atomic + * Purpose: Test that an agent supports atomic and + * operations from global memory. + * + * Test Description: + * 2. For all agents in the system: + * 3. Load and finalize the memory_and_global set of kernels, + * targeting or operations for all data types and memory ordering types. + * 4. Allocate global memory appropriate for each kernel execution. + * 5. Execute each of the kernels on the component. + * + * Expected Results: The and kernels should be execute as expected. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_and_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 16; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_and_global_rlx_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_rlx_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_rlx_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_rlx_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scacq_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scacq_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scacq_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scacq_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_screl_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_screl_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_screl_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_screl_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scar_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scar_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scar_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_and_global_scar_system_b64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0xFFFFFFFF; + *value_uint32= 0xCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = *value_uint32; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 2; + } + + + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0xFFFFFFFFFFFFFFFF; + *value_uint64= 0xCCCCCCCCCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = *value_uint64; + + ASSERT(data_expected_u64 == *data_uint64); + // Increment the symbol index + symbol_index += 2; + } + + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_cas_atomic.c b/src/core/memory/atomics/test_memory_cas_atomic.c new file mode 100644 index 0000000..df794a1 --- /dev/null +++ b/src/core/memory/atomics/test_memory_cas_atomic.c @@ -0,0 +1,240 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_cas_atomic + * Purpose: Test that if an agent supports atomic add + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_cas_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The add kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_cas_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue. + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the brig module for the component + const int num_symbols = 16; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_rlx_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_rlx_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_rlx_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_rlx_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scacq_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scacq_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scacq_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scacq_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_screl_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_screl_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_screl_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_screl_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scar_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scar_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scar_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_cas_global_scar_system_b64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0xCCCCCCCC; + *value_uint32= 0xCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = *value_uint32; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 2; + } + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0xCCCCCCCCCCCCCCCC; + *value_uint64= 0xCCCCCCCCCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = *value_uint64; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 2; + } + + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_decrement_atomic.c b/src/core/memory/atomics/test_memory_decrement_atomic.c new file mode 100644 index 0000000..9dcb153 --- /dev/null +++ b/src/core/memory/atomics/test_memory_decrement_atomic.c @@ -0,0 +1,243 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_decrement_atomic + * Purpose: Test that if an agent supports atomic decrement + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_decrement_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The add kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_decrement_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue. + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 16; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_rlx_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_rlx_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_rlx_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_rlx_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scacq_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scacq_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scacq_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scacq_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_screl_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_screl_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_screl_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_screl_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scar_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scar_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scar_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapdec_global_scar_system_u64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + uint32_t range_max_u32 = 1024; + *data_uint32 = range_max_u32; + *value_uint32= range_max_u32; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = (num_kernel_instances >= range_max_u32) ? + 0 : range_max_u32 - num_kernel_instances; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 2; + } + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + uint64_t range_max_u64 = 1024; + *data_uint64 = range_max_u64; + *value_uint64= range_max_u64; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = (num_kernel_instances >= range_max_u64) ? + 0 : range_max_u64 - num_kernel_instances; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 2; + } + + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_exchange_atomic.c b/src/core/memory/atomics/test_memory_exchange_atomic.c new file mode 100644 index 0000000..2a4df40 --- /dev/null +++ b/src/core/memory/atomics/test_memory_exchange_atomic.c @@ -0,0 +1,239 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_exchange_atomic + * Purpose: Test that if an agent supports atomic exchange + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_exchange_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The exchange kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_exchange_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue. + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 16; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_rlx_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_rlx_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_rlx_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_rlx_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scacq_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scacq_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scacq_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scacq_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_screl_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_screl_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_screl_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_screl_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scar_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scar_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scar_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_exch_global_scar_system_b64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0; + *value_uint32= 0xCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = *value_uint32; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 2; + } + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0; + *value_uint64= 0xCCCCCCCCCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = *value_uint64; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 2; + } + + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_increment_atomic.c b/src/core/memory/atomics/test_memory_increment_atomic.c new file mode 100644 index 0000000..c1563db --- /dev/null +++ b/src/core/memory/atomics/test_memory_increment_atomic.c @@ -0,0 +1,240 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_increment_atomic + * Purpose: Test that if an agent supports atomic increment + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_increment_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The increment kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_increment_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue. + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 16; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_rlx_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_rlx_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_rlx_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_rlx_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scacq_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scacq_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scacq_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scacq_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_screl_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_screl_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_screl_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_screl_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scar_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scar_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scar_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_wrapinc_global_scar_system_u64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + uint32_t range_max_u32 = 1024; + *data_uint32 = 0; + *value_uint32= range_max_u32; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = num_kernel_instances % range_max_u32; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 2; + } + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + uint64_t range_max_u64 = 1024; + *data_uint64 = 0; + *value_uint64= range_max_u64; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = num_kernel_instances % range_max_u64; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 2; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_load_store_atomic.c b/src/core/memory/atomics/test_memory_load_store_atomic.c new file mode 100644 index 0000000..3384d44 --- /dev/null +++ b/src/core/memory/atomics/test_memory_load_store_atomic.c @@ -0,0 +1,231 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_load_store_atomic + * Purpose: Test that if an agent supports atomic load/store + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_load_store_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The load/store kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" +#include + +int test_memory_load_store_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region the supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 8; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_rlx_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_rlx_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_rlx_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_rlx_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_scacq_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_scacq_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_scacq_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_ld_global_scacq_system_b64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0xFFFFFFFF; + *value_uint32= 0xCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = *value_uint32; + + if (data_expected_u32 != *data_uint32) { + ASSERT(data_expected_u32 == *data_uint32); + } + + // Increment the symbol index + symbol_index += 2; + } + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0xFFFFFFFFFFFFFFFF; + *value_uint64= 0xCCCCCCCCCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = *value_uint64; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 2; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_maximum_atomic.c b/src/core/memory/atomics/test_memory_maximum_atomic.c new file mode 100644 index 0000000..cf388f4 --- /dev/null +++ b/src/core/memory/atomics/test_memory_maximum_atomic.c @@ -0,0 +1,300 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_maximum_atomic + * Purpose: Test that if an agent supports atomic maximum + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_maximum_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The maximum kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_maximum_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 32; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_rlx_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scacq_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_screl_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_max_global_scar_system_s64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with u32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0; + *value_uint32= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = *value_uint32; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with u64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0; + *value_uint64= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = *value_uint64; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with s32 data type + symbol_index = 2; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + int32_t* data_int32 = (int32_t*)data; + int32_t* value_int32= (int32_t*)value; + *data_int32 = 0; + *value_int32= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int32_t data_expected_s32 = *value_int32; + + ASSERT(data_expected_s32 == *data_int32); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with s64 data type + symbol_index = 3; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + int64_t* data_int64 = (int64_t*)data; + int64_t* value_int64= (int64_t*)value; + *data_int64 = 0; + *value_int64= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int64_t data_expected_s64 = *value_int64; + + ASSERT(data_expected_s64 == *data_int64); + + // Increment the symbol index + symbol_index += 4; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_minimum_atomic.c b/src/core/memory/atomics/test_memory_minimum_atomic.c new file mode 100644 index 0000000..77173c6 --- /dev/null +++ b/src/core/memory/atomics/test_memory_minimum_atomic.c @@ -0,0 +1,300 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_minimum_atomic + * Purpose: Test that if an agent supports atomic minimum + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_minimum_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The minimum kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_minimum_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 32; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_rlx_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scacq_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_screl_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_min_global_scar_system_s64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with u32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 1; + *value_uint32= 0; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = *value_uint32; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with u64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 1; + *value_uint64= 0; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = *value_uint64; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with s32 data type + symbol_index = 2; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + int32_t* data_int32 = (int32_t*)data; + int32_t* value_int32= (int32_t*)value; + *data_int32 = 1; + *value_int32= 0; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int32_t data_expected_s32 = *value_int32; + + ASSERT(data_expected_s32 == *data_int32); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with s64 data type + symbol_index = 3; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + int64_t* data_int64 = (int64_t*)data; + int64_t* value_int64= (int64_t*)value; + *data_int64 = 1; + *value_int64= 0; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int64_t data_expected_s64 = *value_int64; + + ASSERT(data_expected_s64 == *data_int64); + + // Increment the symbol index + symbol_index += 4; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_or_atomic.c b/src/core/memory/atomics/test_memory_or_atomic.c new file mode 100644 index 0000000..74a7790 --- /dev/null +++ b/src/core/memory/atomics/test_memory_or_atomic.c @@ -0,0 +1,238 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_or_atomic + * Purpose: Test that if an agent supports atomic or + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_or_global set of kernels, + * targeting or operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The add kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_or_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue. + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 16; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_or_global_rlx_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_rlx_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_rlx_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_rlx_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scacq_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scacq_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scacq_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scacq_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_screl_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_screl_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_screl_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_screl_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scar_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scar_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scar_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_or_global_scar_system_b64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0; + *value_uint32= 0xCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = *value_uint32; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 2; + } + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0; + *value_uint64= 0xCCCCCCCCCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = *value_uint64; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 2; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_subtract_atomic.c b/src/core/memory/atomics/test_memory_subtract_atomic.c new file mode 100644 index 0000000..55297b3 --- /dev/null +++ b/src/core/memory/atomics/test_memory_subtract_atomic.c @@ -0,0 +1,304 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_subtract_atomic + * Purpose: Test that if an agent supports atomic subtract + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_subtract_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The subtract kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_subtract_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 32; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_rlx_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scacq_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_screl_system_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_agent_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_agent_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_agent_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_agent_s64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_system_u32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_system_u64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_system_s32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_sub_global_scar_system_s64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_kernel_instances = 64; + + // Launch kernels with u32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + uint32_t original_u32 = 1024; + *data_uint32 = original_u32; + *value_uint32 = 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = original_u32 - (uint32_t)num_kernel_instances; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with u64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + uint64_t original_u64 = 1024; + *data_uint64 = original_u64; + *value_uint64= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = original_u64 - (uint64_t)num_kernel_instances; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with s32 data type + symbol_index = 2; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + int32_t* data_int32 = (int32_t*)data; + int32_t* value_int32= (int32_t*)value; + int32_t original_s32 = 1024; + *data_int32 = original_s32; + *value_int32= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int32_t data_expected_s32 = original_s32 - (int32_t)num_kernel_instances; + + ASSERT(data_expected_s32 == *data_int32); + + // Increment the symbol index + symbol_index += 4; + } + + // Launch kernels with s64 data type + symbol_index = 3; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + int64_t* data_int64 = (int64_t*)data; + int64_t* value_int64= (int64_t*)value; + int64_t original_s64 = 1024; + *data_int64 = original_s64; + *value_int64= 1; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + int64_t data_expected_s64 = original_s64 - (int64_t)num_kernel_instances; + + ASSERT(data_expected_s64 == *data_int64); + + // Increment the symbol index + symbol_index += 4; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/atomics/test_memory_xor_atomic.c b/src/core/memory/atomics/test_memory_xor_atomic.c new file mode 100644 index 0000000..4cabcff --- /dev/null +++ b/src/core/memory/atomics/test_memory_xor_atomic.c @@ -0,0 +1,239 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_xor_atomic + * Purpose: Test that if an agent supports atomic xor + * operations from global memory. + * + * Test Description: + * 1. For all agents on the system that support kernel dispatch: + * 2. Load and finalize the memory_xor_global set of kernels, + * targeting add operations for all data types and memory ordering types. + * 3. Allocate global memory appropriate for each kernel execution. + * 4. Execute each of the kernels on the agent. + * + * Expected Results: The xor kernels should be supported on the agent. + * The kernels should be able to get dispatched and execute correctly. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_xor_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("memory_ops.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if this agent has a global memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the "data" and "value" buffers + void* data; + void* value; + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint64_t), (void**) &value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_symbols = 16; + char* symbol_names[num_symbols]; + int symbol_index = 0; + + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_rlx_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_rlx_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_rlx_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_rlx_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scacq_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scacq_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scacq_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scacq_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_screl_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_screl_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_screl_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_screl_system_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scar_agent_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scar_agent_b64_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scar_system_b32_kernel"; + symbol_names[symbol_index++] = "&__memory_atomic_xor_global_scar_system_b64_kernel"; + + // Get the symbol and the symbol info + symbol_record_t symbol_record[num_symbols]; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, num_symbols, symbol_names, symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Use odd number of kernel instances for XOR operation + const int num_kernel_instances = 63; + + // Launch kernels with b32 data type + symbol_index = 0; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint32_t* data_uint32 = (uint32_t*)data; + uint32_t* value_uint32= (uint32_t*)value; + *data_uint32 = 0x0000FFFF; + *value_uint32= 0xCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint32_t data_expected_u32 = 0x0000FFFF ^ 0xCCCCCCCC; // 0xCCCC3333; + + ASSERT(data_expected_u32 == *data_uint32); + + // Increment the symbol index + symbol_index += 2; + } + + // Launch kernels with b64 data type + symbol_index = 1; + while (symbol_index < num_symbols) { + memset(data, 0, sizeof(uint64_t)); + memset(value, 0, sizeof(uint64_t)); + + uint64_t* data_uint64 = (uint64_t*)data; + uint64_t* value_uint64= (uint64_t*)value; + *data_uint64 = 0x0000FFFF0000FFFF; + *value_uint64= 0xCCCCCCCCCCCCCCCC; + + launch_memory_atomic_kernel(&(agent_list.agents[ii]), queue, &symbol_record[symbol_index], + data, value, num_kernel_instances); + + // Verify the kernel is executed correctly + uint64_t data_expected_u64 = 0x0000FFFF0000FFFF ^ 0xCCCCCCCCCCCCCCCC; // 0xCCCC3333CCCC3333; + + ASSERT(data_expected_u64 == *data_uint64); + + // Increment the symbol index + symbol_index += 2; + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(value); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/hsa_memory.c b/src/core/memory/hsa_memory.c new file mode 100644 index 0000000..a3e3bfb --- /dev/null +++ b/src/core/memory/hsa_memory.c @@ -0,0 +1,108 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_memory.h" + +DEFINE_TEST(memory_allocate_max_size); +DEFINE_TEST(memory_allocate_zero_size); +DEFINE_TEST(memory_assign_agent); +DEFINE_TEST(memory_allocated_vector_copy_heap); +DEFINE_TEST(memory_allocated_vector_copy_stack); +DEFINE_TEST(memory_group_dynamic_allocation); +DEFINE_TEST(memory_copy_allocated_to_allocated) +DEFINE_TEST(memory_copy_allocated_to_registered) +DEFINE_TEST(memory_copy_registered_to_allocated) +DEFINE_TEST(memory_copy_registered_to_registered) +DEFINE_TEST(memory_vector_copy_between_stack_and_heap); +DEFINE_TEST(memory_vector_copy_heap_not_registered); +DEFINE_TEST(memory_vector_copy_heap_registered); +DEFINE_TEST(memory_vector_copy_stack_not_registered); +DEFINE_TEST(memory_vector_copy_stack_registered); +DEFINE_TEST(memory_minimum_region); +DEFINE_TEST(memory_region_concurrent_get_info); +DEFINE_TEST(memory_region_alignment); +DEFINE_TEST(memory_register_subrange); +DEFINE_TEST(memory_concurrent_allocate); +DEFINE_TEST(memory_concurrent_free); +DEFINE_TEST(memory_concurrent_register); +DEFINE_TEST(memory_concurrent_deregister); +DEFINE_TEST(memory_basic_allocate_free); +DEFINE_TEST(memory_basic_register_deregister); +DEFINE_TEST(memory_coherence_after_register); +DEFINE_TEST(memory_copy_system_and_global); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(memory_allocate_max_size); + ADD_TEST(memory_allocate_zero_size); + ADD_TEST(memory_assign_agent); + ADD_TEST(memory_allocated_vector_copy_heap); + ADD_TEST(memory_allocated_vector_copy_stack); + ADD_TEST(memory_group_dynamic_allocation); + ADD_TEST(memory_copy_allocated_to_allocated); + ADD_TEST(memory_copy_allocated_to_registered); + ADD_TEST(memory_copy_registered_to_allocated); + ADD_TEST(memory_copy_registered_to_registered); + ADD_TEST(memory_vector_copy_between_stack_and_heap); + ADD_TEST(memory_vector_copy_heap_not_registered); + ADD_TEST(memory_vector_copy_heap_registered); + ADD_TEST(memory_vector_copy_stack_not_registered); + ADD_TEST(memory_vector_copy_stack_registered); + ADD_TEST(memory_minimum_region); + ADD_TEST(memory_region_concurrent_get_info); + ADD_TEST(memory_region_alignment); + ADD_TEST(memory_register_subrange); + ADD_TEST(memory_concurrent_allocate); + ADD_TEST(memory_concurrent_free); + ADD_TEST(memory_concurrent_register); + ADD_TEST(memory_concurrent_deregister); + ADD_TEST(memory_basic_allocate_free); + ADD_TEST(memory_basic_register_deregister); + ADD_TEST(memory_coherence_after_register); + ADD_TEST(memory_copy_system_and_global); + RUN_TESTS(); + return 0; +} diff --git a/src/core/memory/hsa_memory.h b/src/core/memory/hsa_memory.h new file mode 100644 index 0000000..57b4089 --- /dev/null +++ b/src/core/memory/hsa_memory.h @@ -0,0 +1,77 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_MEMORY_H_ +#define _HSA_MEMORY_H_ + +extern int test_memory_allocate_max_size(); +extern int test_memory_allocate_zero_size(); +extern int test_memory_assign_agent(); +extern int test_memory_allocated_vector_copy_heap(); +extern int test_memory_allocated_vector_copy_stack(); +extern int test_memory_group_dynamic_allocation(); +extern int test_memory_copy_allocated_to_allocated(); +extern int test_memory_copy_allocated_to_registered(); +extern int test_memory_copy_registered_to_allocated(); +extern int test_memory_copy_registered_to_registered(); +extern int test_memory_vector_copy_between_stack_and_heap(); +extern int test_memory_vector_copy_heap_not_registered(); +extern int test_memory_vector_copy_heap_registered(); +extern int test_memory_vector_copy_stack_not_registered(); +extern int test_memory_vector_copy_stack_registered(); +extern int test_memory_minimum_region(); +extern int test_memory_region_concurrent_get_info(); +extern int test_memory_region_alignment(); +extern int test_memory_register_subrange(); +extern int test_memory_concurrent_allocate(); +extern int test_memory_concurrent_free(); +extern int test_memory_concurrent_register(); +extern int test_memory_concurrent_deregister(); +extern int test_memory_basic_allocate_free(); +extern int test_memory_basic_register_deregister(); +extern int test_memory_coherence_after_register(); +extern int test_memory_copy_system_and_global(); + +#endif // _HSA_MEMORY_H_ diff --git a/src/core/memory/test_helper_func.c b/src/core/memory/test_helper_func.c new file mode 100644 index 0000000..968f6ba --- /dev/null +++ b/src/core/memory/test_helper_func.c @@ -0,0 +1,175 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +// launch the vector_copy kernel, and wait for the kernel to finish +void launch_vector_copy_kernel( + hsa_queue_t* queue, + uint32_t data_size, + uint64_t kernel_object, + void* kernarg_address + ) { + hsa_status_t status; + + // create a signal with initial value of 1 + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // request a new packet ID + uint64_t packet_id = hsa_queue_add_write_index_acquire(queue, 1); + + // holding on not to write any new packet to the queue if the queue is full. + while (packet_id - hsa_queue_load_read_index_relaxed(queue) >= queue->size) {} + + // compute packet offset + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)queue->base_address + + packet_id % queue->size; + + // initialize the packet + memset(dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet->completion_signal = signal; + dispatch_packet->setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = (uint16_t)256; + dispatch_packet->workgroup_size_y = (uint16_t)1; + dispatch_packet->workgroup_size_z = (uint16_t)1; + dispatch_packet->grid_size_x = (uint32_t)data_size; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->kernel_object = kernel_object; + dispatch_packet->kernarg_address = (void*) kernarg_address; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Wait until the kernel complete + while (0 != hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + hsa_signal_destroy(signal); + + return; +} + +// Clear the data, launch the kernel, and wait for the execution to complete. +void launch_init_data_kernel( + hsa_queue_t* queue, + uint32_t* data, + uint32_t total_size, + uint32_t value, + int dim, + hsa_dim3_t grid_dim, + hsa_dim3_t workgroup_dim, + uint64_t kernel_object, + void* kernarg_address) { + hsa_status_t status; + + // clear the data + memset(data, 0, sizeof(uint32_t) * total_size); + + // the kernarg data structure + kernarg_init_data_t args; + + // setup the kernarg + args.data = data; + args.value = value; + args.row_pitch = grid_dim.x; + args.slice_pitch = grid_dim.x * grid_dim.y; + memcpy((void*)kernarg_address, &args, sizeof(args)); + + // create a signal with initial value of 1 + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // request a new packet ID + uint64_t packet_id = hsa_queue_add_write_index_acquire(queue, 1); + + // holding on not to write any new packet to the queue if the queue is full. + while (packet_id - hsa_queue_load_read_index_relaxed(queue) >= queue->size) {} + + // compute packet offset + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)queue->base_address + + packet_id % queue->size; + + // initialize the packet + memset(dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet->completion_signal = signal; + dispatch_packet->setup = dim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = (uint16_t)workgroup_dim.x; + dispatch_packet->workgroup_size_y = (uint16_t)workgroup_dim.y; + dispatch_packet->workgroup_size_z = (uint16_t)workgroup_dim.z; + dispatch_packet->grid_size_x = grid_dim.x; + dispatch_packet->grid_size_y = grid_dim.y; + dispatch_packet->grid_size_z = grid_dim.z; + dispatch_packet->kernel_object = kernel_object; + dispatch_packet->kernarg_address = (void*) kernarg_address; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // wait until the kernel complete + while (0 != hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + hsa_signal_destroy(signal); + + return; +} diff --git a/src/core/memory/test_helper_func.h b/src/core/memory/test_helper_func.h new file mode 100644 index 0000000..850c1a9 --- /dev/null +++ b/src/core/memory/test_helper_func.h @@ -0,0 +1,110 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _TEST_HELPER_FUNC_H_ +#define _TEST_HELPER_FUNC_H_ + +#include +#include + +#define ARGUMENT_ALIGN_BYTES 16 + +// vector_copy kernarg +typedef struct __attribute__ ((aligned(ARGUMENT_ALIGN_BYTES))) kernarg_vector_copy_s { + void* in; + void* out; +} kernarg_vector_copy_t; + +// launch the vector_copy kernel, and wait for the kernel to finish +void launch_vector_copy_kernel( + hsa_queue_t* queue, + uint32_t data_size, + uint64_t kernel_obj_address, + void* kernarg_address); + +// struct to store memory region info +typedef struct region_info_s { + hsa_region_segment_t segment; + hsa_region_global_flag_t flags; + size_t size; + size_t alloc_max_size; + bool alloc_allowed; + size_t alloc_granule; + size_t alloc_alignemnt; +} region_info_t; + + +// get memory region info +void get_region_info(hsa_region_t region, region_info_t* info); + +// init_data kernarg +typedef struct __attribute__ ((aligned(ARGUMENT_ALIGN_BYTES))) kernarg_init_data_s { + void* data; + uint32_t value; + uint32_t row_pitch; + uint32_t slice_pitch; +} kernarg_init_data_t; + +// clear the data, launch the init_data kernel, and wait for the execution to complete. +void launch_init_data_kernel( + hsa_queue_t* queue, + uint32_t* data, + uint32_t total_size, + uint32_t value, + int dim, + hsa_dim3_t grid_dim, + hsa_dim3_t workgroup_dim, + uint64_t kernel_obj_address, + void* kernarg_address); + +// The kernarg data structure for group_memory_dynamic_allocation +typedef struct __attribute__ ((aligned(ARGUMENT_ALIGN_BYTES))) kernarg_group_memory_dynamic_alloc_s { + uint32_t* data_in; + uint32_t* data_out; + uint32_t grp_offset; + uint32_t count; +} kernarg_group_memory_dynamic_alloc_t; + +#endif // _TEST_HELPER_FUNC_H_ diff --git a/src/core/memory/test_memory_allocate_max_size.c b/src/core/memory/test_memory_allocate_max_size.c new file mode 100644 index 0000000..026d0ea --- /dev/null +++ b/src/core/memory/test_memory_allocate_max_size.c @@ -0,0 +1,118 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_allocate_max_size + * + * Purpose: Verify that hsa_memory_allocate can't allocate more than MAX_ALLOC_SIZE memory once. + * + * Test Description: + * + * 1. Iterate over all of the agents in the system. + * + * 2. For each agent, iterate over all of the memory regions. + * + * 3. Get the attribute of HSA_REGION_INFO_ALLOC_MAX_SIZE. + * + * 4. Call hsa_memory_allocate with a size greater than HSA_REGION_ALLOC_MAX_SIZE. + * + * 5. Check that the API return the correct error code. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_allocate_max_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + hsa_device_type_t agent_type; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_agent_t agent = agent_list.agents[ii]; + + // Get the list of regions + struct region_list_s region_list; + get_region_list(agent, ®ion_list); + + int jj; + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Determine if allocation requests are allowed + bool allowed; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &allowed); + + if (!allowed) { + continue; + } + + size_t max_size; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_ALLOC_MAX_SIZE, &max_size); + + char *buf; + status = hsa_memory_allocate(region_list.regions[jj], (max_size + 16), (void**) &buf); + ASSERT(status == HSA_STATUS_ERROR_INVALID_ALLOCATION); + } + + free_region_list(®ion_list); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_allocate_zero_size.c b/src/core/memory/test_memory_allocate_zero_size.c new file mode 100644 index 0000000..65e3c5a --- /dev/null +++ b/src/core/memory/test_memory_allocate_zero_size.c @@ -0,0 +1,113 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_allocate_zero_size + * + * Purpose: Verify that requesting an allocation of 0 size is valid. + * + * Test Description: + * + * 1. Iterate over all of the agents in the system. + * + * 2. For each agent, iterate over all of the memory regions. + * + * 3. Call hsa_memory_allocate with a size 0. + * + * 4. Check that the API return the correct (HSA_STATUS_SUCCESS). + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_allocate_zero_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + hsa_device_type_t agent_type; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_agent_t agent = agent_list.agents[ii]; + + // Get the list of regions + struct region_list_s region_list; + get_region_list(agent, ®ion_list); + + int jj; + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Determine if allocation requests are allowed + bool allowed; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &allowed); + + if (!allowed) { + continue; + } + + char *buf; + status = hsa_memory_allocate(region_list.regions[jj], 0, (void**) &buf); + ASSERT(status == HSA_STATUS_ERROR_INVALID_ARGUMENT); + } + + free_region_list(®ion_list); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_allocated_vector_copy_heap.c b/src/core/memory/test_memory_allocated_vector_copy_heap.c new file mode 100644 index 0000000..4a82c9d --- /dev/null +++ b/src/core/memory/test_memory_allocated_vector_copy_heap.c @@ -0,0 +1,285 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_allocated_vector_copy_heap + * + * Purpose: Test that memory allocated by hsa runtime can be accessed by + * component, and that kernel can copy data from/to allocated memory + * to/from heap, which is either registered or not. + * + * Test Description: + * + * 1. Repeat the following test for every agent that supports the full profile. + * + * 2. Allocate two memory blocks from the system's heap memory. Use standard system + * memory APIs (i.e. malloc()). + * + * 3. Allocate the same sized memory block using the HSA runtime APIs, allocating the + * memory in the global segment. + * + * 4. Initialize the first buffer to non-zero values, and the other buffers to zero. + * + * 5. Launch a kernel that copies data from the first buffer to last buffer, and then + * back to the second. + * + * 6. Check that the buffer was copied correctly. + * + * 7. Register the system allocated buffers, and repeat steps 5 and 6. + * + * Expected Results: No error status should be returned during the execution. + * + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_allocated_vector_copy_heap() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Allocate the system data buffers + const uint32_t block_size = 1024 * 1024; + uint32_t* sysbuf_1 = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + uint32_t* sysbuf_2 = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + uint32_t* agent_buf; + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check that the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get the agent's global region, and allocate the agent buffer + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + status = hsa_memory_allocate(global_region, block_size* sizeof(uint32_t), (void *)&agent_buf); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Get the agent's kernarg region + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + kernarg_vector_copy_t args; + + // Initialize the data + int kk; + for (kk = 0; kk < block_size; ++kk) { + sysbuf_1[kk] = kk; + } + + // Setup the kernarg + args.in = sysbuf_1; + args.out = agent_buf; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(agent_buf, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Setup the kernarg + args.in = agent_buf; + args.out = sysbuf_2; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(sysbuf_2, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(sysbuf_2[kk] == sysbuf_1[kk]); + } + + // ----------------------------------- + // Repeat with registered memory. + // ----------------------------------- + status = hsa_memory_register(sysbuf_1, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_register(sysbuf_2, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + args.in = sysbuf_1; + args.out = agent_buf; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(agent_buf, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Setup the kernarg + args.in = agent_buf; + args.out = sysbuf_2; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(sysbuf_2, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(sysbuf_2[kk] == sysbuf_1[kk]); + } + + status = hsa_memory_deregister(sysbuf_1, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_deregister(sysbuf_2, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the agent allocated buffer + status = hsa_memory_free(agent_buf); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + ++ii; + } + + // Free the heap buffers + free(sysbuf_1); + free(sysbuf_2); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_allocated_vector_copy_stack.c b/src/core/memory/test_memory_allocated_vector_copy_stack.c new file mode 100644 index 0000000..a088c5a --- /dev/null +++ b/src/core/memory/test_memory_allocated_vector_copy_stack.c @@ -0,0 +1,280 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_allocated_vector_copy_heap + * + * Purpose: Test that memory allocated by hsa runtime can be accessed by + * component, and that kernel can copy data from/to allocated memory + * to/from heap, which is either registered or not. + * + * Test Description: + * + * 1. Repeat the following test for every agent that supports the full profile. + * + * 2. Allocate two memory blocks from the system's stack. + * + * 3. Allocate the same sized memory block using the HSA runtime APIs, allocating the + * memory in the global segment. + * + * 4. Initialize the first buffer to non-zero values, and the other buffers to zero. + * + * 5. Launch a kernel that copies data from the first buffer to last buffer, and then + * back to the second. + * + * 6. Check that the buffer was copied correctly. + * + * 7. Register the system allocated buffers, and repeat steps 5 and 6. + * + * Expected Results: No error status should be returned during the execution. + * + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" +#include + +int test_memory_allocated_vector_copy_stack() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Allocate the system data buffers + const uint32_t block_size = 1024; + uint32_t sysbuf_1[block_size]; + uint32_t sysbuf_2[block_size]; + uint32_t* agent_buf; + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check that the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get the agent's global region, and allocate the agent buffer + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + status = hsa_memory_allocate(global_region, block_size* sizeof(uint32_t), (void *)&agent_buf); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + kernarg_vector_copy_t args; + + // Initialize the data + int kk; + for (kk = 0; kk < block_size; ++kk) { + sysbuf_1[kk] = kk; + } + + // Setup the kernarg + args.in = sysbuf_1; + args.out = agent_buf; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(agent_buf, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Setup the kernarg + args.in = agent_buf; + args.out = sysbuf_2; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(sysbuf_2, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(sysbuf_2[kk] == sysbuf_1[kk]); + } + + // ----------------------------------- + // Repeat with registered memory. + // ----------------------------------- + status = hsa_memory_register(sysbuf_1, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_register(sysbuf_2, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + args.in = sysbuf_1; + args.out = agent_buf; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(agent_buf, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Setup the kernarg + args.in = agent_buf; + args.out = sysbuf_2; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(sysbuf_2, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(sysbuf_2[kk] == sysbuf_1[kk]); + } + + status = hsa_memory_deregister(sysbuf_1, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_deregister(sysbuf_2, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the agent allocated buffer + status = hsa_memory_free(agent_buf); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + ++ii; + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_assign_agent.c b/src/core/memory/test_memory_assign_agent.c new file mode 100644 index 0000000..a66529d --- /dev/null +++ b/src/core/memory/test_memory_assign_agent.c @@ -0,0 +1,270 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_assign_agent + * Purpose: Test that the hsa_memory_assign_agent() assigns a new owner + * (another agent) to a coarse-grained global buffer, and verifies that the new + * owner can access the buffer. + * + * Test Description: + * 1. Get a list of all available agents, and find the one that has a coarse- + * grained memory region. + * 2. Allocate a memory buffer on this coarse-grained region. This buffer is + * going to be accessed by other agents after the ownership of this buffer + * is re-assigned. + * 3. For each agent that supports kernel dispatch (other than the agent that + * already has a coarse-grained region identified), perform the following + * actions: + * 1) Assign this agent as the owner of the coarse-grained buffer by using + * hsa_memory_assign_agent() with Read & Write access permissions. + * 2) Allocate a fine-grained buffer from this agent's global region. + * 3) Launch the vector_copy kernel to transfer data between the coarse- + * grained buffer and the fine-grained buffer. + * 4) Verify that data are correctly transferred. + * + * + * Expected Results: The ownership of a coarse-grained buffer can be re-assigned + * to another agent, and that agent can access the buffer. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_assign_agent() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Find the agent that has a coarse-grained memory region + hsa_agent_t coarse_grained_agent; + hsa_region_t coarse_grained_region; + coarse_grained_agent.handle = (uint64_t)-1; + coarse_grained_region.handle = (uint64_t)-1; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_agent_iterate_regions(agent_list.agents[ii], + get_global_memory_region_coarse_grained, &coarse_grained_region); + if ((uint64_t)-1 != coarse_grained_region.handle) { + // Found the agent that has a coarse-grained region + coarse_grained_agent = agent_list.agents[ii]; + } + } + + // If we didn't find the coarse-grained agent, stop here. + if ((uint64_t)-1 == coarse_grained_agent.handle) { + return 0; + } + + // Allocate memory on the coarse-grained region + const int data_size = 1024; + const size_t buffer_byte_size = (size_t)(sizeof(uint32_t) * data_size); + uint32_t* coarse_grained_buffer; + status = hsa_memory_allocate(coarse_grained_region, buffer_byte_size, (void**)&coarse_grained_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Repeat the test for each agent that supports kernel dispatch + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Skip the agent that is the coarse-grained agent + if (agent_list.agents[ii].handle == coarse_grained_agent.handle) { + continue; + } + + // Check if the agent supports kernel dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find the agent's kernarg region + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate memory on this agent's global region + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], + get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + uint32_t* fine_grained_buffer; + status = hsa_memory_allocate(global_region, buffer_byte_size, (void**)&fine_grained_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Assign the new owner to the coarse-grained buffer + status = hsa_memory_assign_agent(coarse_grained_buffer, agent_list.agents[ii], HSA_ACCESS_PERMISSION_RW); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a kernel dispatch queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup kernel arguments + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // the kernarg data structure + kernarg_vector_copy_t args; + + // Initialize data for write operation + int jj; + for (jj = 0; jj < data_size; ++jj) { + fine_grained_buffer[jj] = (uint32_t)jj; + } + memset(coarse_grained_buffer, 0, buffer_byte_size); + args.in = fine_grained_buffer; + args.out = coarse_grained_buffer; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, data_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the coarse-grained buffer has the correct data + for (jj = 0; jj < data_size; ++jj) { + if (coarse_grained_buffer[jj] != (uint32_t)jj) { + ASSERT(0); + } + } + + // Initialize data for read operation + memset(fine_grained_buffer, 0, buffer_byte_size); + args.in = coarse_grained_buffer; + args.out = fine_grained_buffer; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, data_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the coarse-grained buffer has the correct data + for (jj = 0; jj < data_size; ++jj) { + if (fine_grained_buffer[jj] != (uint32_t)jj) { + ASSERT(0); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the fine-grained buffer + status = hsa_memory_free(fine_grained_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_memory_free(coarse_grained_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/memory/test_memory_basic_allocate_free.c b/src/core/memory/test_memory_basic_allocate_free.c new file mode 100644 index 0000000..74131a0 --- /dev/null +++ b/src/core/memory/test_memory_basic_allocate_free.c @@ -0,0 +1,120 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_basic_allocate_free + * + * Purpose: Test that hsa_memory_allocate and hsa_memory_free + * can be used on any region that supports memory allocation. + * + * Test Description: + * + * 1. Iterate over all of the agents in the system. + * + * 2. For each agent, iterate over all of the memory regions. + * + * 3. Query if the region supports memory allocation. + * + * 4. Call hsa_memory_allocate and make sure it succeeds. + * + * 5. Call hsa_memory_free and make sure it succeeds. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_basic_allocate_free() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + hsa_device_type_t agent_type; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_agent_t agent = agent_list.agents[ii]; + + // Get the list of regions + struct region_list_s region_list; + get_region_list(agent, ®ion_list); + + int jj; + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Determine if allocation requests are allowed + bool allowed; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &allowed); + + if (!allowed) { + continue; + } + + char *buf = NULL; + status = hsa_memory_allocate(region_list.regions[jj], 1024, (void**) &buf); + ASSERT(status == HSA_STATUS_SUCCESS); + + ASSERT(buf != NULL); + + status = hsa_memory_free((void**) buf); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + free_region_list(®ion_list); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_basic_register_deregister.c b/src/core/memory/test_memory_basic_register_deregister.c new file mode 100644 index 0000000..28b8abe --- /dev/null +++ b/src/core/memory/test_memory_basic_register_deregister.c @@ -0,0 +1,85 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: basic_register_deregister + * Scope: Conformance + * + * Purpose: Test that if hsa_memory_register and hsa_memory_deregister works well. + * + * Test Description: + * + * 1. Malloc a proper block of memory using std allocation function(i.e malloc()) + * + * 2. After init HsaRt, call hsa_memory_register to register this block, check if it succeed. + * + * 3. Then, call hsa_memory_deregister to deregister the whole memory block from thunk, check if it succeed. + **/ + +#include +#include +#include + +int test_memory_basic_register_deregister() { + void *ptr; + ptr = malloc(512); + + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_register(ptr, 512); + ASSERT_MSG(status == HSA_STATUS_SUCCESS, "Failed to register the memory block\n"); + + status = hsa_memory_deregister(ptr, 512); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to deregister this memory block\n"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free(ptr); + + return 0; +} diff --git a/src/core/memory/test_memory_coherence_after_register.c b/src/core/memory/test_memory_coherence_after_register.c new file mode 100644 index 0000000..9b860b3 --- /dev/null +++ b/src/core/memory/test_memory_coherence_after_register.c @@ -0,0 +1,203 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_coherence_after_register + * Scope: Conformance + * + * Purpose: Test that if registered memory is coherent. + * + * Test Description: + * + * 1. Malloc a block of memory using a system allocation API (i.e malloc()). + * + * 2. Call hsa_memory_register to register this block of memory. + * + * 3. Launch a kernel to modify the values of the memory block. + * + * 4. Deregister the memory block. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_coherence_after_register() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + uint32_t block_size = 1024 * 1024; + uint32_t* data_in = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + uint32_t* data_out = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + + ASSERT(data_in != NULL); + ASSERT(data_out != NULL); + + status = hsa_memory_register(data_in, sizeof(uint32_t) * block_size); + ASSERT(status == HSA_STATUS_SUCCESS); + status = hsa_memory_register(data_out, sizeof(uint32_t) * block_size); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Get the list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Verify that the agent supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + kernarg_vector_copy_t args; + args.in = data_in; + args.out = data_out; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(data_in[kk] == data_out[kk]); + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_memory_deregister(data_in, sizeof(uint32_t) * block_size); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_deregister(data_out, sizeof(uint32_t) * block_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + free(data_out); + free(data_in); + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_concurrent_allocate.c b/src/core/memory/test_memory_concurrent_allocate.c new file mode 100644 index 0000000..a74a1f2 --- /dev/null +++ b/src/core/memory/test_memory_concurrent_allocate.c @@ -0,0 +1,193 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * + * Test Name: memory_concurrent_allocate + * + * Purpose: Test that memory can be allocated concurrently for each region + * + * Test Description: + * + * 1. Iterate over all of the agents in the system and for each agent: + * + * 2. Iterate over all of the regions associated with the agent, and for each region: + * + * 3. If the region is allocatable, launch 10 child threads to allocate 10 + * different memory blocks. + * + * 4. Wait in the main thread until all of the allocation threads finish + * + * 5. Check that the 10 returned pointers are aligned and that there is not overlap + * between two different allocated blocks. The pointer value should be + * reside between region BASE+SIZE. + * + * 6. Free all of the memory blocks sequentially. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +#define NUM_THREADS 32 +#define MAX_ALLOC_SIZE 1024 * 1024 + +typedef struct control_block { + hsa_region_t* region; + size_t alloc_size; + void* alloc_pointer; +} cb_t; + +void test_hsa_memory_allocate_func(void *data) { + hsa_status_t status; + cb_t *cb = (cb_t*) data; + + status = hsa_memory_allocate(*(cb->region), cb->alloc_size, (void**) &(cb->alloc_pointer)); + if (status != HSA_STATUS_SUCCESS) { + cb->alloc_pointer = NULL; + } + + return; +} + +int test_memory_concurrent_allocate() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + const char *err_str; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + int jj; + // Get the list of regions + struct region_list_s region_list; + get_region_list(agent_list.agents[ii], ®ion_list); + + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Determine if memory can be allocated in this region + bool allowed; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &allowed); + if (!allowed) { + continue; + } + + // Get the maximum allocation size + size_t alloc_size; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_ALLOC_MAX_SIZE, &alloc_size); + + // Adjust the size to the minimum of 1024 or max alloc size + alloc_size = (alloc_size < MAX_ALLOC_SIZE) ? alloc_size: MAX_ALLOC_SIZE; + + // Create a test group + struct test_group* tg_concurr_init = test_group_create(NUM_THREADS); + + // The control blocks are used to pass data to the threads + int kk; + cb_t cb[NUM_THREADS]; + for (kk = 0; kk < NUM_THREADS; kk++) { + cb[kk].region = &(region_list.regions[jj]); + cb[kk].alloc_size = alloc_size; + test_group_add(tg_concurr_init, &test_hsa_memory_allocate_func, &cb[kk], 1); + } + + // Create threads for each test + test_group_thread_create(tg_concurr_init); + + // Start to run tests + test_group_start(tg_concurr_init); + + // Wait all tests finish + test_group_wait(tg_concurr_init); + + // Exit all tests + test_group_exit(tg_concurr_init); + + // Destroy thread group and cleanup resources + test_group_destroy(tg_concurr_init); + + // Check for overlapping addresses + void *addr1, *addr2; + for (kk = 0; kk < NUM_THREADS; ++kk) { + addr1 = cb[kk].alloc_pointer; + addr2 = (void *)((int*) (addr1+alloc_size)); + ASSERT(addr1 != NULL); + int ll; + for (ll = kk+1; ll < NUM_THREADS; ++ll) { + if (addr1 < (cb[ll].alloc_pointer)) { + ASSERT(addr2 <= (cb[ll].alloc_pointer)); + } + if (addr2 > (cb[ll].alloc_pointer+alloc_size)) { + ASSERT(addr1 >= (cb[ll].alloc_pointer+alloc_size)); + } + } + } + + for (ii = 0; ii < NUM_THREADS; ii++) { + status = hsa_memory_free(cb[ii].alloc_pointer); + ASSERT(status == HSA_STATUS_SUCCESS); + } + } + + free_region_list(®ion_list); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_concurrent_deregister.c b/src/core/memory/test_memory_concurrent_deregister.c new file mode 100644 index 0000000..1fc9877 --- /dev/null +++ b/src/core/memory/test_memory_concurrent_deregister.c @@ -0,0 +1,127 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: concurrent_memory_deregister + * Purpose: Test that if memory can be deregistered concurrently. + * + * Test Description: + * + * 1. Malloc a block of memory using a system allocation API (i.e. malloc()) + * + * 2. Divide this block into N segments and register the segments sequentially. + * + * 3. Launch 10 child threads to deregister the segments concurrently. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +#define NUM_THREADS 10 +#define BLOCK_SIZE 1024 + +void test_hsa_deregister_func(void *data) { + hsa_status_t status; + status = hsa_memory_deregister(data, BLOCK_SIZE * sizeof(char)); + ASSERT(status == HSA_STATUS_SUCCESS); + return; +} + +int test_memory_concurrent_deregister() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + char *ptr; + ptr = (char*) malloc(NUM_THREADS * BLOCK_SIZE * sizeof(char)); + + // Register the memory segments + int ii; + for (ii = 0; ii < NUM_THREADS; ++ii) { + status = hsa_memory_register(ptr + (ii * BLOCK_SIZE), BLOCK_SIZE * sizeof(char)); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Create a test group + struct test_group * tg_concurr_init = test_group_create(NUM_THREADS); + + // Add the tests into the test group + for (ii = 0; ii < NUM_THREADS; ii++) { + test_group_add(tg_concurr_init, &test_hsa_deregister_func, ptr + (ii * BLOCK_SIZE), 1); + } + + // Create threads for each test + test_group_thread_create(tg_concurr_init); + + // Start to run tests + test_group_start(tg_concurr_init); + + // Wait all tests finish + test_group_wait(tg_concurr_init); + + // Exit all tests + test_group_exit(tg_concurr_init); + + for (ii = 0; ii < NUM_THREADS; ii++) { + status = test_group_test_status(tg_concurr_init, ii); + ASSERT(TEST_ERROR != status); + } + + // Destroy tests, cleanup resources + test_group_destroy(tg_concurr_init); + + free(ptr); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/memory/test_memory_concurrent_free.c b/src/core/memory/test_memory_concurrent_free.c new file mode 100644 index 0000000..e857272 --- /dev/null +++ b/src/core/memory/test_memory_concurrent_free.c @@ -0,0 +1,167 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * + * Test Name: concurrent_memory_free + * + * Purpose: Test that memory can be freed concurrently for each region + * + * Test Description: + * + * 1. Iterate over all of the agents in the system and for each agent: + * + * 2. Iterate over all of the regions associated with the agent, and for each region: + * + * 3. If the region is allocatable, allocate N regions. + * + * 4. Launch N child threads to free the memory blocks concurrently. + * + * 5. Wait in the main thread until all of the threads finish + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +#define NUM_THREADS 32 + +typedef struct control_block { + hsa_region_t* region; + size_t alloc_size; + void* alloc_pointer; +} cb_t; + +void test_hsa_memory_free_func(void *data) { + hsa_status_t status; + cb_t *cb = (cb_t*) data; + + status = hsa_memory_free(cb->alloc_pointer); + if (status != HSA_STATUS_SUCCESS) { + cb->alloc_pointer = NULL; + } + + return; +} + +int test_memory_concurrent_free() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + const char *err_str; + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + int jj; + // Get the list of regions + struct region_list_s region_list; + get_region_list(agent_list.agents[ii], ®ion_list); + + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Determine if memory can be allocated in this region + bool allowed; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &allowed); + if (!allowed) { + continue; + } + + // Get the maximum allocation size + size_t alloc_size; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_ALLOC_MAX_SIZE, &alloc_size); + + // Adjust the size to the minimum of 1024 or max alloc size + alloc_size = (alloc_size < 1024) ? alloc_size: 1024; + + // Create a test group + struct test_group* tg_concurr_init = test_group_create(NUM_THREADS); + + // The control blocks are used to pass data to the threads + int kk; + cb_t cb[NUM_THREADS]; + for (kk = 0; kk < NUM_THREADS; kk++) { + cb[kk].region = &(region_list.regions[jj]); + cb[kk].alloc_size = alloc_size; + status = hsa_memory_allocate(*(cb[kk].region), cb[kk].alloc_size, &(cb[kk].alloc_pointer)); + ASSERT(HSA_STATUS_SUCCESS == status); + test_group_add(tg_concurr_init, &test_hsa_memory_free_func, &cb[kk], 1); + } + + // Create threads for each test + test_group_thread_create(tg_concurr_init); + + // Start to run tests + test_group_start(tg_concurr_init); + + // Wait all tests finish + test_group_wait(tg_concurr_init); + + // Exit all tests + test_group_exit(tg_concurr_init); + + // Destroy thread group and cleanup resources + test_group_destroy(tg_concurr_init); + } + + free_region_list(®ion_list); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_concurrent_register.c b/src/core/memory/test_memory_concurrent_register.c new file mode 100644 index 0000000..fbb409a --- /dev/null +++ b/src/core/memory/test_memory_concurrent_register.c @@ -0,0 +1,123 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: concurrent_memory_register + * Purpose: Test that if memory can be registered concurrently. + * + * Test Description: + * + * 1. Malloc a block of memory using a system allocation API (i.e. malloc()) + * + * 2. Divide this block into N segments and register the segments on different + * threads. + * + * 3. Deregister the segments sequentially. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +#define NUM_THREADS 10 +#define BLOCK_SIZE 1024 + +void test_hsa_register_func(void *data) { + hsa_status_t status; + status = hsa_memory_register(data, BLOCK_SIZE * sizeof(char)); + ASSERT(status == HSA_STATUS_SUCCESS); + return; +} + +int test_memory_concurrent_register() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + char *ptr; + ptr = (char*) malloc(NUM_THREADS * BLOCK_SIZE * sizeof(char)); + + // Create a test group + struct test_group * tg_concurr_init = test_group_create(NUM_THREADS); + + // Add the tests into the test group + int ii; + for (ii = 0; ii < NUM_THREADS; ii++) { + test_group_add(tg_concurr_init, &test_hsa_register_func, ptr + (ii * BLOCK_SIZE), 1); + } + + // Create threads for each test + test_group_thread_create(tg_concurr_init); + + // Start to run tests + test_group_start(tg_concurr_init); + + // Wait all tests finish + test_group_wait(tg_concurr_init); + + // Exit all tests + test_group_exit(tg_concurr_init); + + // Destroy tests, cleanup resources + test_group_destroy(tg_concurr_init); + + // Deregister the memory segments + for (ii = 0; ii < NUM_THREADS; ++ii) { + status = hsa_memory_deregister(ptr + (ii * BLOCK_SIZE), BLOCK_SIZE * sizeof(char)); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(ptr); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/memory/test_memory_copy_allocated_to_allocated.c b/src/core/memory/test_memory_copy_allocated_to_allocated.c new file mode 100644 index 0000000..f5f45d8 --- /dev/null +++ b/src/core/memory/test_memory_copy_allocated_to_allocated.c @@ -0,0 +1,130 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_copy_allocated_to_allocated + * + * Purpose: Tests that the hsa_memory_copy API can copy data between memory + * allocated by the hsa runtime allocator API's. + * + * Test Description: + * + * 1. Iterate through all of the agents in the system, and for each agent: + * 2. Find a region in the agent's global segment and use hsa_memory_allocate + * to allocate two buffers, one denoted as the destination buffer and + * the other the source buffer. + * 3. For all pairs of source and destination buffers, initialize both + * the source and destination buffers to different values. + * 4. Use hsa_memory_copy to copy source to destination. + * + * Expected Results: For all buffer pairs, the data from the source should be + * successfully copied to the destination. + * + */ + +#include +#include +#include + +int test_memory_copy_allocated_to_allocated() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if this agent has a global memory region + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region, &global_region); + if ((uint64_t)-1 != global_region.handle) { + continue; + } + + // Allocate buffer on the global memory segment + size_t block_size = 1024; + uint32_t* src_buffer; + uint32_t* dst_buffer; + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &src_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &dst_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the buffer + int kk; + for (kk = 0; kk < block_size; ++kk) { + src_buffer[kk] = kk; + dst_buffer[kk] = 0; + } + + hsa_memory_copy(dst_buffer, src_buffer, sizeof(uint32_t) * block_size); + + // Verify data are successfully copied + for (kk = 0; kk < block_size; ++kk) { + if (dst_buffer[kk] != src_buffer[kk]) { + ASSERT(0); + } + } + + // Free the buffer + status = hsa_memory_free(src_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(dst_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_copy_allocated_to_registered.c b/src/core/memory/test_memory_copy_allocated_to_registered.c new file mode 100644 index 0000000..3fb12d7 --- /dev/null +++ b/src/core/memory/test_memory_copy_allocated_to_registered.c @@ -0,0 +1,142 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Purpose: Tests that the hsa_memory_copy API can copy data between memory + * allocated by the hsa runtime allocator API's and registered memory. + * + * Test Description: + * + * 1. Iterate through all of the agents in the system, and for each agent: + * 2. Find a region in the agent's global segment and use hsa_memory_allocate + * to allocate one buffer, one denoted as the source buffer. + * 3. Allocate memory from the system, and register it with the hsa_register_memory + * API, and denote it as the destination. + * 4. For all source buffers, initialize the both the source and destination + * buffers to different values and use hsa_memory_copy to copy source to destination. + * + * Expected Results: For all buffer copies, the data from the source should be + * successfully copied to the destination. + * + */ + +#include +#include +#include +#include + +int test_memory_copy_allocated_to_registered() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if this agent has a global memory region + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region, &global_region); + if ((uint64_t)-1 != global_region.handle) { + continue; + } + + // Allocate the source buffer on the global segment + size_t data_size = 1024; + uint32_t* src_buffer; + status = hsa_memory_allocate(global_region, data_size * sizeof(uint32_t), (void*) &src_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the destination buffer on system memory + uint32_t* dst_buffer; + dst_buffer = (uint32_t*)malloc(data_size * sizeof(uint32_t)); + // Register the destination buffer + status = hsa_memory_register(dst_buffer, data_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the buffer + int kk; + for (kk = 0; kk < data_size; ++kk) { + src_buffer[kk] = kk; + dst_buffer[kk] = 0; + } + + hsa_memory_copy(dst_buffer, src_buffer, sizeof(uint32_t) * data_size); + + // Verify data are successfully copied + for (kk = 0; kk < data_size; ++kk) { + if (dst_buffer[kk] != src_buffer[kk]) { + ASSERT(0); + } + } + + // Free the buffer + status = hsa_memory_free(src_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_deregister(dst_buffer, data_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + free(dst_buffer); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_copy_registered_to_allocated.c b/src/core/memory/test_memory_copy_registered_to_allocated.c new file mode 100644 index 0000000..0b87b4f --- /dev/null +++ b/src/core/memory/test_memory_copy_registered_to_allocated.c @@ -0,0 +1,142 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Purpose: Tests that the hsa_memory_copy API can copy data between memory + * allocated by the hsa runtime allocator API's and registered memory. + * + * Test Description: + * + * 1. Iterate through all of the agents in the system, and for each agent: + * 2. Find a region in the agent's global segment and use hsa_memory_allocate + * to allocate one buffer, one denoted as the destination buffer. + * 3. Allocate memory from the system, and register it with the hsa_register_memory + * API, and denote it as the source. + * 4. For all source buffers, initialize the both the source and destination + * buffers to different values and use hsa_memory_copy to copy source to destination. + * + * Expected Results: For all buffer copies, the data from the source should be + * successfully copied to the destination. + * + */ + +#include +#include +#include +#include + +int test_memory_copy_registered_to_allocated() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if this agent has a global memory region + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + continue; + } + + // Allocate the destination buffer on the global segment + size_t data_size = 1024; + uint32_t* dst_buffer; + status = hsa_memory_allocate(global_region, data_size * sizeof(uint32_t), (void*) &dst_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the destination buffer on system memory + uint32_t* src_buffer; + src_buffer = (uint32_t*)malloc(data_size * sizeof(uint32_t)); + // Register the destination buffer + status = hsa_memory_register(src_buffer, data_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the buffer + int kk; + for (kk = 0; kk < data_size; ++kk) { + src_buffer[kk] = kk; + dst_buffer[kk] = 0; + } + + hsa_memory_copy(dst_buffer, src_buffer, sizeof(uint32_t) * data_size); + + // Verify data are successfully copied + for (kk = 0; kk < data_size; ++kk) { + if (dst_buffer[kk] != src_buffer[kk]) { + ASSERT(0); + } + } + + // Free the buffer + status = hsa_memory_free(dst_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_deregister(src_buffer, data_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + free(src_buffer); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_copy_registered_to_registered.c b/src/core/memory/test_memory_copy_registered_to_registered.c new file mode 100644 index 0000000..6446d55 --- /dev/null +++ b/src/core/memory/test_memory_copy_registered_to_registered.c @@ -0,0 +1,118 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_copy_registered_to_registered + * + * Purpose: Tests that the hsa_memory_copy API can copy data between memory + * registered by the hsa runtime registration API's. + * + * Test Description: + * + * 1. Allocate two buffers from system memory and register them using the + * hsa_memory_register API. Denote one as the source buffer and the + * other as the destination buffer. + * 2. Initialize both buffers to different values. + * 3. Use the hsa_memory_copy API to copy the source values to the + * destination buffer. + * + * Expected Results: The data from the source should be + * successfully copied to the destination. + * + */ + +#include +#include +#include + +int test_memory_copy_registered_to_registered() { + hsa_status_t status; + const uint32_t block_size = 1024; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t *src_buffer = (uint32_t *)malloc(block_size* sizeof(uint32_t)); + ASSERT(src_buffer != NULL); + + uint32_t *dst_buffer = (uint32_t *)malloc(block_size* sizeof(uint32_t)); + ASSERT(src_buffer != NULL); + + // Register the memory + status = hsa_memory_register(src_buffer, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_register(dst_buffer, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + int kk; + for (kk = 0; kk < block_size; ++kk) { + src_buffer[kk] = kk; + } + memset(dst_buffer, 0, sizeof(uint32_t) * block_size); + + status = hsa_memory_copy(dst_buffer, src_buffer, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + for (kk = 0; kk < block_size; ++kk) { + ASSERT(dst_buffer[kk] == kk); + } + + // Deregister the memory + status = hsa_memory_deregister(src_buffer, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_deregister(dst_buffer, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + free(src_buffer); + free(dst_buffer); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/memory/test_memory_copy_system_and_global.c b/src/core/memory/test_memory_copy_system_and_global.c new file mode 100644 index 0000000..59a92d7 --- /dev/null +++ b/src/core/memory/test_memory_copy_system_and_global.c @@ -0,0 +1,162 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_copy_system_and_global + * Purpose: Test that the hsa_memory_copy() can copy data between system memory + * and global memory. + * + * Test Description: + * + * 1. Initialize hsa runtime by calling hsa_init(); + * 2. Declare two system memory blocks, block_s1 and block_s2; + * 3. Allocate two memory blocks on the global region of an agent that supports + * kernel dispatch, block_g1 and block_g2; + * 4. Initialize block_s1 with non zero value and block_s2 with zero value; + * 5. Initialize block_g1 with non zero value and block_g2 with zero value; + * 6. Use hsa_memory_copy() to copy data + * 1) from block_s1 to block_s2; + * 2) from block_g1 to block_g2; + * 3) from block_s1 to block_g2; + * 4) from block_g1 to block_s2; + * 7. Verify data have been successfully copied in each of sub-steps in 6 + * 8. Shut down hsa runtime. + + * Expected Results: No error status is returned during the process and the + * value copied is always correct. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_copy_system_and_global() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if this agent supports kernel dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Allocate system memory blocks + const int block_size = 4096; + int* block_s1 = (int*)malloc(sizeof(int) * block_size); + int* block_s2 = (int*)malloc(sizeof(int) * block_size); + + // Find the global region for this agent + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + // Allocate memory blocks on the global region + int* block_g1; + int* block_g2; + status = hsa_memory_allocate(global_region, sizeof(int) * block_size, (void**) &block_g1); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(int) * block_size, (void**) &block_g2); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the values in the memory blocks + int jj; + for (jj = 0; jj < block_size; ++jj) { + block_s1[jj] = jj; + block_g1[jj] = jj; + } + + // 6. + int* src[4] = {block_s1, block_g1, block_s1, block_g1}; + int* dest[4] = {block_s2, block_g2, block_g2, block_s2}; + for (jj = 0; jj < 4; ++jj) { + // Clear the destination memory block + memset(dest[jj], 0, sizeof(int) * block_size); + + // Apply the memory copy + status = hsa_memory_copy(dest[jj], src[jj], sizeof(int) * block_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the copied data are correct + int kk; + for (kk == 0; kk < block_size; ++kk) { + if (kk != dest[jj][kk]) { + // Data inconsistency occured + ASSERT(0); + } + } + } + + // Free the memory blocks + status = hsa_memory_free(block_g1); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(block_g2); + ASSERT(HSA_STATUS_SUCCESS == status); + free(block_s1); + free(block_s2); + } + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_device.c b/src/core/memory/test_memory_device.c new file mode 100644 index 0000000..da379d1 --- /dev/null +++ b/src/core/memory/test_memory_device.c @@ -0,0 +1,221 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_device + * Purpose: Verifies that a kernel can access device memory successfully + * + * Test Description: + * 1. Initialize hsa runtime by calling hsa_init(); + * 2. Iterate all of the agents in the system and find a GPU device; + * 3. Iterate all of the regions for this GPU to find a device memory region; + * 4. Allocate a memory block in this region, and launch a kernel using this block as its input; + * 5. In the kernel, try to write something to this memory block. + * + * Expected Results: The kernel should be able to access the device memory without error. + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_device() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the agent is a GPU agent + hsa_device_type_t device_type; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEVICE, &device_type); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_DEVICE_TYPE_GPU != device_type) { + // continue if this agent is not a GPU agent + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + const uint32_t queue_size = 256; + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_init_data_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of all regions available to this agent + struct region_list_s region_list; + get_region_list(agent_list.agents[ii], ®ion_list); + + // For each of the region, get region info + int jj; + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Get the region info + region_info_t info; + get_region_info(region_list.regions[jj], &info); + + // Verify this is the device memory: + // size > 0, alloc_max_size > 0 + if (info.size > 0 && info.alloc_max_size > 0) { + // Allocate on this region + const size_t data_size = 1024; + int* data; + status = hsa_memory_allocate(region_list.regions[jj], data_size, (void*) &data); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_dim3_t grid_dim; + hsa_dim3_t workgroup_dim; + + int dim; + dim = 2; + // Consistent with data_size + workgroup_dim.x = 256; + workgroup_dim.y = 4; + workgroup_dim.z = 1; + grid_dim.x = workgroup_dim.x; + grid_dim.y = workgroup_dim.y; + grid_dim.z = workgroup_dim.z; + const int value = 0xcccccccc; + launch_init_data_kernel(queue, + data, data_size, value, dim, + grid_dim, workgroup_dim, + (uint64_t)(symbol_record.kernel_object), + (void*)kernarg_buffer); + // Verify the kernel was executed correctly + int kk; + for (kk = 0; kk < data_size; ++kk) { + if (data[kk] != value) { + ASSERT(0); + } + } + + status = hsa_memory_free(data); + ASSERT(HSA_STATUS_SUCCESS == status); + } + } + + // Free the region list + if (region_list.num_regions > 0) { + free(region_list.regions); + } + + // Free the kernarg_buffer that was allocated on kernarg_region + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy queues + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_group_dynamic_allocation.c b/src/core/memory/test_memory_group_dynamic_allocation.c new file mode 100644 index 0000000..234fc5f --- /dev/null +++ b/src/core/memory/test_memory_group_dynamic_allocation.c @@ -0,0 +1,259 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_group_dynamic_allocation + * Purpose: Verify that group memory can be dynamically allocated and used in + * a kernel. + * + * Test Description: + * 1) Generate a list of agents that support dispatch. + * 2) For each agent, load and finalize the group_memory_dynamic kernel. + * 3) Create suitable memory buffers to correctly execute the + * group_memory_dynamic kernel. + * 4) Execute the kernel on the target agent, and wait for the execution to + * complete. + * + * Expected Results: The kernel should execute successfully, and the memory + * should be modified as expected. + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_group_dynamic_allocation() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("group_memory.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get the agent's global region and allocate input and output buffers + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__group_memory_dynamic_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel + // The size of the data block must be able to fit into one workgroup + const uint32_t num_workitems = 256; + const uint32_t num_workgroups = 4; + const uint32_t group_memory_size = 1024; + + uint32_t block_size = num_workitems * num_workgroups; + uint32_t* data_in; + uint32_t* data_out; + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_in); + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_out); + + // Initialize the data + memset(data_out, 0, sizeof(uint32_t) * block_size); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + // Calculate the offset of dynamic group memory (or, the size of static + // group memory) + // The dynamic group memory starts right after the static group memory, + // if the kernel defines one. + uint32_t grp_offset = symbol_record.group_segment_size; + + // The total byte size of group memory, static + dynamic + uint32_t total_grp_byte_sizse = symbol_record.group_segment_size + group_memory_size * sizeof(uint32_t); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_group_memory_dynamic_alloc_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + kernarg_group_memory_dynamic_alloc_t args; + args.data_in = data_in; + args.data_out = data_out; + args.grp_offset = (uint32_t)grp_offset; + args.count = group_memory_size; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Create a signal with initial value of 1 + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Enqueue the dispatch packet + uint64_t packet_id = hsa_queue_add_write_index_acquire(queue, 1); + + while (packet_id - hsa_queue_load_read_index_relaxed(queue) >= queue->size) {} + + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)queue->base_address + + packet_id % queue->size; + + memset(dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet->completion_signal = signal; + dispatch_packet->setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = (uint16_t)num_workitems; + dispatch_packet->workgroup_size_y = (uint16_t)1; + dispatch_packet->workgroup_size_z = (uint16_t)1; + dispatch_packet->grid_size_x = (uint32_t)block_size; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->kernel_object = (uint64_t) symbol_record.kernel_object; + dispatch_packet->kernarg_address = (void*) kernarg_buffer; + dispatch_packet->group_segment_size = total_grp_byte_sizse; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Ring the signal door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Wait until the kernel complete + while (0 != hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + hsa_signal_destroy(signal); + + // Validate the kernel was executed correctly + for (kk = 0; kk < block_size; ++kk) { + ASSERT(data_out[kk] == data_in[kk]); + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the data buffers + status = hsa_memory_free(data_in); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(data_out); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_minimum_region.c b/src/core/memory/test_memory_minimum_region.c new file mode 100644 index 0000000..13dc050 --- /dev/null +++ b/src/core/memory/test_memory_minimum_region.c @@ -0,0 +1,130 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_minimum_region + * Purpose: Test that the system includes at least one system global region + * and one KERNARG region and that the size is the same for the different + * agents. + * + * Test Description: + * 1. Iterate all of the agents in the system, and for each agent: + * 2. Iterate all of the regions associated with the agent, and for each + * region: + * 3. Get the region the region's segment and flag INFO. + * 4. Check that a primary coherent memory is listed for each agent, and + * that the size is the same across different agents. + * 5. Check that is a KERNARG segment is included, the size is the same + * across different agents. + * + * Expected Results: All agents should have a system global memory region, + * and the size should be the same for all of them. If they have a KERNARG + * region, the size should be the same for all of them. + * + */ + +#include +#include +#include +#include +#include + +int test_memory_minimum_region() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // An array of size_t to record the sizes of "minimum" global region + // on each agent + size_t* global_region_sizes = (size_t*)malloc(agent_list.num_agents * sizeof(size_t)); + memset(global_region_sizes, 0, agent_list.num_agents * sizeof(size_t)); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Get a list of all regions available to this agent + struct region_list_s region_list; + get_region_list(agent_list.agents[ii], ®ion_list); + + // For each of the regions, get the region's info + int jj; + size_t size; + hsa_region_segment_t segment; + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Get the region size + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_SEGMENT, &segment); + if (HSA_REGION_SEGMENT_GLOBAL == segment) { + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_SIZE, &size); + global_region_sizes[ii] = size; + } + } + + // Free the region list + if (region_list.num_regions > 0) { + free(region_list.regions); + } + } + + // Verify the sizes of global region of each agent is the same + for (ii = 1; ii < agent_list.num_agents; ++ii) { + if (global_region_sizes[ii] != global_region_sizes[0]) { + ASSERT(0); + } + } + + free(global_region_sizes); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_region_alignment.c b/src/core/memory/test_memory_region_alignment.c new file mode 100644 index 0000000..28d5469 --- /dev/null +++ b/src/core/memory/test_memory_region_alignment.c @@ -0,0 +1,113 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: region_alignment + * + * Purpose: Test that if each region's attribute information is following + * the constraints, and if the information is consistent across multiple threads + * + * Test Description: + * 1) For each agent + * 2) For each memory region of the agent that has HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED alloc memory and make sure + * 3) It is aligned as specified by the HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT + * 4) That the alignment attribute is a power of 2. + * + * Expected Results: The queried alignment size should be consistent + * as specified by the HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT. + */ + +#include +#include +#include +#include + +int test_memory_region_alignment() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Get a list of all regions available to this agent + struct region_list_s region_list; + get_region_list(agent_list.agents[ii], ®ion_list); + + // For each of the region, get region info + int jj; + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Find the max_size defined by the region + size_t max_size; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_ALLOC_MAX_SIZE, &max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the region that has HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT + size_t alignment_size; + status = hsa_region_get_info(region_list.regions[jj], HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT, &alignment_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify the alignment attribute is a power of 2 + if (max_size == 0) { + ASSERT(alignment_size == 0); + } else { + ASSERT(alignment_size&&(!(alignment_size&(alignment_size-1)))); + } + } + // Free the region list + free_region_list(®ion_list); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_region_concurrent_get_info.c b/src/core/memory/test_memory_region_concurrent_get_info.c new file mode 100644 index 0000000..7cfc7bb --- /dev/null +++ b/src/core/memory/test_memory_region_concurrent_get_info.c @@ -0,0 +1,192 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: region_info_check_concurrency + * + * Purpose: Test that if each region's attribute information is following + * the constraints, and if the information is consistent across multiple threads + * + * Test Description: + * 1. Initialize the runtime by calling hsa_init. + * 2. Iterate all of the agents in the system, and for each agent: + * 3. Iterate all of the regions associated with agent, and for each region: + * 4. Get attribute info, check if the results are following the constraints, + * and store these attribute information. + * 5. Launch several threads to query region information again, and compare + * the concurrently queried information with the data generated by the main + * thread. + * + * Expected Results: The concurrently queried information should be consistent + * with the information queried by the main thread. + */ + +#include +#include +#include +#include + +typedef struct region_info_s { + hsa_region_segment_t segment; + hsa_region_global_flag_t flags; + size_t size; + size_t alloc_max_size; + bool alloc_allowed; + size_t alloc_granule; + size_t alloc_alignemnt; +} region_info_t; + +// Get memory region info +void get_region_info(hsa_region_t region, region_info_t* info) { + hsa_status_t status; + + status = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &info->segment); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &info->flags); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &info->size); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_region_get_info(region, HSA_REGION_INFO_ALLOC_MAX_SIZE, &info->alloc_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_region_get_info(region, HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &info->alloc_allowed); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_region_get_info(region, HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE, &info->alloc_granule); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_region_get_info(region, HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT, &info->alloc_alignemnt); + ASSERT(HSA_STATUS_SUCCESS == status); +} + +typedef struct thread_data_get_region_info_s { + // The current region + hsa_region_t region; + // The region info retrieved from main thread + region_info_t* info; + // Consistency check result + int consistency; +} thread_data_get_region_info_t; + +void thread_proc_get_region_info(void* data) { + thread_data_get_region_info_t* thread_data = (thread_data_get_region_info_t*) data; + + region_info_t info; + memset(&info, 0, sizeof(region_info_t)); + get_region_info(thread_data->region, &info); + + if (0 == memcmp(thread_data->info, &info, sizeof(region_info_t))) { + // The region info is consistent with the one got from the main thread + thread_data->consistency = 1; + } else { + thread_data->consistency = 0; + } +} + +int test_memory_region_concurrent_get_info() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Get a list of all regions available to this agent + struct region_list_s region_list; + get_region_list(agent_list.agents[ii], ®ion_list); + + // For each of the region, get region info + int jj; + for (jj = 0; jj < region_list.num_regions; ++jj) { + // Get the region info + region_info_t info; + memset(&info, 0, sizeof(region_info_t)); + get_region_info(region_list.regions[jj], &info); + + // Prepare thread data + const int num_threads = 10; + thread_data_get_region_info_t thread_data[num_threads]; + int kk; + for (kk = 0; kk < num_threads; ++kk) { + thread_data[kk].region = region_list.regions[jj]; + thread_data[kk].info = &info; + thread_data[kk].consistency = 0; + } + + // Launch threads to get the region info concurrently + struct test_group* tg_region_info = test_group_create(num_threads); + for (kk = 0; kk < num_threads; ++kk) { + test_group_add(tg_region_info, &thread_proc_get_region_info, thread_data + kk, 1); + } + test_group_thread_create(tg_region_info); + test_group_start(tg_region_info); + test_group_wait(tg_region_info); + test_group_exit(tg_region_info); + test_group_destroy(tg_region_info); + + // Verify region info is consistent among all threads + for (kk = 0; kk < num_threads; ++kk) { + ASSERT(1 == thread_data[kk].consistency); + } + } + + // Free the region list + free_region_list(®ion_list); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_register_subrange.c b/src/core/memory/test_memory_register_subrange.c new file mode 100644 index 0000000..eac91ad --- /dev/null +++ b/src/core/memory/test_memory_register_subrange.c @@ -0,0 +1,223 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_register_subrange + * Scope: Conformance + * + * Purpose: Test that a whole memory block can be registered separately. + * + * Test Description: + * 1. Malloc a block of memory using std allocation function(i.e malloc()) + * 2. Divide this block into several sub-range. + * 3. Call hsa_memory_register on each of the sub-blocks. + * 4. Execute vector_copy on each sub-block, and ensure that the kernel executes correctly. + * 5. Deregister each sub-block. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_register_subrange() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Continue if this agent does not support DISPATCH + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the kernel argument buffer from the correct region + kernarg_vector_copy_t* kernarg_buffer; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), (void*) &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + const size_t subrange_size = 1024 * sizeof(int32_t); + const size_t num_subranges = 128; + size_t data_size = subrange_size * num_subranges; + size_t data_size_in_bytes = sizeof(int) * data_size; + int32_t* data = (int*) malloc(data_size_in_bytes); + int32_t* sub_data[num_subranges]; + + int jj; + // Set up pointers of each subrange + for (jj = 0; jj < num_subranges; ++jj) { + sub_data[jj] = data + jj * subrange_size; + } + + // Register all the subranges + for (jj = 0; jj < num_subranges; ++jj) { + status = hsa_memory_register(sub_data[jj], subrange_size); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Launch the init_data kernel on each subrange data + int32_t values[1024]; + + for (jj = 0; jj < num_subranges; ++jj) { + int kk; + for (kk = 0; kk < 1024; ++kk) { + values[kk] = jj; + } + + // Fill in the kernel arguments + kernarg_buffer->in = (void*) values; + kernarg_buffer->out = (void*) sub_data[jj]; + launch_vector_copy_kernel(queue, + 1024, + (uint64_t)(symbol_record.kernel_object), + (void*) kernarg_buffer); + + // Verify the kernel was executed correctly + for (kk = 0; kk < 1024; ++kk) { + if (sub_data[jj][kk] != jj) { + ASSERT(0); + } + } + } + + // Deregister all the subranges + for (jj = 0; jj < num_subranges; ++jj) { + status = hsa_memory_deregister(sub_data[jj], subrange_size); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(data); + + // Free the kernarg_buffer that was allocated on kernarg_region + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy queues + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_vector_copy_between_stack_and_heap.c b/src/core/memory/test_memory_vector_copy_between_stack_and_heap.c new file mode 100644 index 0000000..49ec5db --- /dev/null +++ b/src/core/memory/test_memory_vector_copy_between_stack_and_heap.c @@ -0,0 +1,289 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_vector_copy_between_stack_and_heap + * Purpose: Test that registered/non-registered stack memory can be copied + * to/from registered/non-registered heap memory. + * + * Test Description: + * + * 2. Allocate a memory block using system allocation API like malloc(). + * 3. Declare the same size block on the stack. + * 4. Set all of the stack values to non zero value, and the heap value + * to zero value. + * 5. Register both sets of memory with HSA. + * 6. Launch a kernel to copy data from stack to the heap. + * 7. Set stack values to zero and launch a kernel to copy data + the heap to the stack + * 8. Deregister block #2 and repeat step 4. Register block #1, launch a kernel + * to copy data from block #2 to block #1, after kernel finishes, check + * the correctness of copying. + * 9. Set all of the value in block#2 to zero, launch a kernel to copy data + * from block #1 to block #2, check the correctness of copying. + * 10. Deregister block #1, and shut down hsa runtime; + * + * Expected results: No error code should be returned during the process and + * value copied must be as the same the source. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_vector_copy_between_stack_and_heap() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get the list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel. + const uint32_t block_size = 1024; + uint32_t* heap_block = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + uint32_t stack_block[block_size]; + + // Initialize the data + memset(heap_block, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + stack_block[kk] = kk; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + kernarg_vector_copy_t args; + + // ----------------------------------- + // Step 5, 6: + // Register the block #2 only. + // Launch the kernel to copy stack_block --> heap_block + // ----------------------------------- + status = hsa_memory_register(stack_block, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + args.in = stack_block; + args.out = heap_block; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*) kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(heap_block[kk] == stack_block[kk]); + } + + // ---------------------------------------------- + // Step 7: + // Launch the kernel to copy heap_block --> stack_block + // ---------------------------------------------- + args.in = heap_block; + args.out = stack_block; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(stack_block, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*) kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(heap_block[kk] == stack_block[kk]); + } + + // ----------------------------------------------- + // Step 8: + // Deregister stack_block, register heap_block, then copy stack_block --> heap_block + // ----------------------------------------------- + status = hsa_memory_register(heap_block, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_deregister(stack_block, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + args.in = stack_block; + args.out = heap_block; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(heap_block, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*) kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(heap_block[kk] == stack_block[kk]); + } + + // ----------------------------------------------- + // Step 9: + // Clear stack_block, then copy heap_block --> stack_block + // ----------------------------------------------- + args.in = heap_block; + args.out = stack_block; + memcpy((void*)kernarg_buffer, &args, sizeof(args)); + + // Clear the destination block + memset(stack_block, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*) kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + if (heap_block[kk] != stack_block[kk]) { + ASSERT(0); + } + } + + // Deregister the heap_block + status = hsa_memory_deregister(heap_block, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + free(heap_block); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_vector_copy_heap_not_registered.c b/src/core/memory/test_memory_vector_copy_heap_not_registered.c new file mode 100644 index 0000000..000729a --- /dev/null +++ b/src/core/memory/test_memory_vector_copy_heap_not_registered.c @@ -0,0 +1,208 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_vector_copy_heap_not_registered + * Purpose: Test that non registered heap memory can be accessed by an agent if + * it supports the full profile, and that a kernel can copy data from/to another place. + * + * Test Description: + * 1. Allocate two memory blocks of the same size using system allocation APIs like malloc. + * 2. Initialize the first block with non zero values and the second block with zero values. + * 3. Launch a kernel to copy data from the first block to the second. + * 4. Repeat step 3 for several times. + * + * Expected Results: No error status is returned during the process and the values are copied + * correctly. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_vector_copy_heap_not_registered() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel + // The size of the data block must be able to fit into one workgroup + uint32_t block_size = 1024; + uint32_t* data_in = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + uint32_t* data_out = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + // Allocate the kernel argument buffer from the correct region + kernarg_vector_copy_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), (void*) &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + kernarg_buffer->in = data_in; + kernarg_buffer->out = data_out; + + // Execute the kernel several times + int jj; + int repeat_count = 8; + for (jj = 0; jj < repeat_count; ++jj) { + // Clear the output block + memset(data_out, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(data_in[kk] == data_out[kk]); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + free(data_in); + free(data_out); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_vector_copy_heap_registered.c b/src/core/memory/test_memory_vector_copy_heap_registered.c new file mode 100644 index 0000000..e421e6d --- /dev/null +++ b/src/core/memory/test_memory_vector_copy_heap_registered.c @@ -0,0 +1,220 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_vector_copy_heap_registered + * Purpose: Test that registered heap memory can be accessed by an agent if + * it supports the full profile, and that a kernel can copy data from/to another place. + * + * Test Description: + * 1. Allocate two memory blocks of the same size using system allocation APIs like malloc. + * 2. Initialize the first block with non zero values and the second block with zero values. + * 3. Launch a kernel to copy data from the first block to the second. + * 4. Repeat step 3 for several times. + * + * Expected Results: No error status is returned during the process and the values are copied + * correctly. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_vector_copy_heap_registered() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data block to be used by the kernel + // The size of the data block must be able to fit into one workgroup + uint32_t block_size = 1024; + uint32_t* data_in = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + uint32_t* data_out = (uint32_t*)malloc(sizeof(uint32_t) * block_size); + + // Register the memory + status = hsa_memory_register(data_in, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_register(data_out, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + // Allocate the kernel argument buffer from the correct region + kernarg_vector_copy_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), (void*) &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + kernarg_buffer->in = data_in; + kernarg_buffer->out = data_out; + + // Execute the kernel several times + int jj; + int repeat_count = 8; + for (jj = 0; jj < repeat_count; ++jj) { + // Clear the output block + memset(data_out, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(data_in[kk] == data_out[kk]); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Deregister the memory + status = hsa_memory_deregister(data_in, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_deregister(data_out, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + free(data_in); + free(data_out); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_vector_copy_stack_not_registered.c b/src/core/memory/test_memory_vector_copy_stack_not_registered.c new file mode 100644 index 0000000..3f60d71 --- /dev/null +++ b/src/core/memory/test_memory_vector_copy_stack_not_registered.c @@ -0,0 +1,205 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_vector_copy_heap_not_registered + * Purpose: Test that non registered stack memory can be accessed by an agent if + * it supports the full profile, and that a kernel can copy data from/to another place. + * + * Test Description: + * 1. Declare two stack arrays of the same size. + * 2. Initialize the first array with non zero values and the second with zero values. + * 3. Launch a kernel to copy data from the first array to the second. + * 4. Repeat step 3 for several times. + * + * Expected Results: No error status is returned during the process and the values are copied + * correctly. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_vector_copy_stack_not_registered() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Declare the arrays + // The size of the data block must be able to fit into one workgroup + uint32_t block_size = 1024; + uint32_t data_in[block_size]; + uint32_t data_out[block_size]; + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + // Allocate the kernel argument buffer from the correct region + kernarg_vector_copy_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), (void*) &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + kernarg_buffer->in = data_in; + kernarg_buffer->out = data_out; + + // Execute the kernel several times + int jj; + int repeat_count = 8; + for (jj = 0; jj < repeat_count; ++jj) { + // Clear the output block + memset(data_out, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(data_in[kk] == data_out[kk]); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/memory/test_memory_vector_copy_stack_registered.c b/src/core/memory/test_memory_vector_copy_stack_registered.c new file mode 100644 index 0000000..fe814f3 --- /dev/null +++ b/src/core/memory/test_memory_vector_copy_stack_registered.c @@ -0,0 +1,216 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: memory_vector_copy_stack_registered + * Purpose: Test that registered stack memory can be accessed by an agent if + * it supports the full profile, and that a kernel can copy data from/to another place. + * + * Test Description: + * 1. Two stack arrays of the same size. + * 2. Initialize the first array with non zero values and the second array with zero values. + * 3. Launch a kernel to copy data from the first array to the second. + * 4. Repeat step 3 for several times. + * + * Expected Results: No error status is returned during the process and the values are copied + * correctly. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_memory_vector_copy_stack_registered() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("vector_copy.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Verify that the agent supports the full profile + hsa_profile_t profile; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_PROFILE, &profile); + if (HSA_PROFILE_FULL != profile) { + continue; + } + + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 16, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__vector_copy_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Declare the arrays + uint32_t block_size = 1024; + uint32_t data_in[block_size]; + uint32_t data_out[block_size]; + + // Register the memory + status = hsa_memory_register(data_in, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_register(data_out, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + memset(data_out, 0, block_size * sizeof(uint32_t)); + int kk; + for (kk = 0; kk < block_size; ++kk) { + data_in[kk] = kk; + } + + // Allocate the kernel argument buffer from the correct region + kernarg_vector_copy_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(kernarg_vector_copy_t), (void*) &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernarg + kernarg_buffer->in = data_in; + kernarg_buffer->out = data_out; + + // Execute the kernel several times + int jj; + int repeat_count = 8; + for (jj = 0; jj < repeat_count; ++jj) { + // Clear the output block + memset(data_out, 0, sizeof(uint32_t) * block_size); + + // Launch the vector_copy kernel + launch_vector_copy_kernel(queue, block_size, (uint64_t)symbol_record.kernel_object, (void*)kernarg_buffer); + + // Verify the output data block is updated + for (kk = 0; kk < block_size; ++kk) { + ASSERT(data_in[kk] == data_out[kk]); + } + } + + // Free the kernarg memory buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Deregister the memory + status = hsa_memory_deregister(data_in, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_deregister(data_out, block_size * sizeof(uint32_t)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/hsa_queue.c b/src/core/queue/hsa_queue.c new file mode 100644 index 0000000..00f7138 --- /dev/null +++ b/src/core/queue/hsa_queue.c @@ -0,0 +1,92 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_queue.h" + +DEFINE_TEST(queue_create_concurrent) +DEFINE_TEST(queue_create_parameters) +DEFINE_TEST(queue_callback) +DEFINE_TEST(queue_destroy_concurrent) +DEFINE_TEST(queue_dispatch_concurrent) +DEFINE_TEST(queue_full) +DEFINE_TEST(queue_multiple_dispatch) +DEFINE_TEST(queue_inactivate) +DEFINE_TEST(queue_size_create) +DEFINE_TEST(queue_multiple_queues) +DEFINE_TEST(queue_multi_gap) +DEFINE_TEST(queue_write_index_add_acq_rel_ordering) +DEFINE_TEST(queue_write_index_add_acquire_release_ordering) +DEFINE_TEST(queue_write_index_add_atomic) +DEFINE_TEST(queue_write_index_cas_acq_rel_ordering) +DEFINE_TEST(queue_write_index_cas_acquire_release_ordering) +DEFINE_TEST(queue_write_index_cas_atomic) +DEFINE_TEST(queue_write_index_load_store_atomic) + +int main(int argc, char* argv[]) +{ + INITIALIZE_TESTSUITE(); + ADD_TEST(queue_create_parameters); + ADD_TEST(queue_callback); + ADD_TEST(queue_create_concurrent) + ADD_TEST(queue_destroy_concurrent); + ADD_TEST(queue_dispatch_concurrent) + ADD_TEST(queue_full) + ADD_TEST(queue_multiple_dispatch); + ADD_TEST(queue_inactivate); + ADD_TEST(queue_size_create); + ADD_TEST(queue_multiple_queues) + ADD_TEST(queue_multi_gap); + ADD_TEST(queue_write_index_add_acq_rel_ordering); + ADD_TEST(queue_write_index_add_acquire_release_ordering); + ADD_TEST(queue_write_index_add_atomic); + ADD_TEST(queue_write_index_cas_acq_rel_ordering); + ADD_TEST(queue_write_index_cas_acquire_release_ordering) + ADD_TEST(queue_write_index_cas_atomic); + // This test may not be correct. Doesn't monotonically + // increment the write index. + // ADD_TEST(queue_write_index_load_store_atomic); + RUN_TESTS(); +} diff --git a/src/core/queue/hsa_queue.h b/src/core/queue/hsa_queue.h new file mode 100644 index 0000000..47700af --- /dev/null +++ b/src/core/queue/hsa_queue.h @@ -0,0 +1,67 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_QUEUE_H_ +#define _HSA_QUEUE_H_ +extern int test_concurrent_queue_create(); +extern int test_queue_callback(); +extern int test_queue_create_concurrent(); +extern int test_queue_create_parameters(); +extern int test_queue_destroy_concurrent(); +extern int test_queue_dispatch_concurrent(); +extern int test_queue_multi_gap(); +extern int test_queue_multiple_queues(); +extern int test_queue_multiple_dispatch(); +extern int test_queue_full(); +extern int test_queue_inactivate(); +extern int test_queue_size_create(); +extern int test_queue_write_index_add_acq_rel_ordering(); +extern int test_queue_write_index_add_acquire_release_ordering(); +extern int test_queue_write_index_add_atomic(); +extern int test_queue_write_index_cas_acq_rel_ordering(); +extern int test_queue_write_index_cas_acquire_release_ordering(); +extern int test_queue_write_index_cas_atomic(); +extern int test_queue_write_index_load_store_atomic(); +#endif // _HSA_QUEUE_H_ diff --git a/src/core/queue/test_queue_callback.c b/src/core/queue/test_queue_callback.c new file mode 100644 index 0000000..4888d0c --- /dev/null +++ b/src/core/queue/test_queue_callback.c @@ -0,0 +1,193 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_callback + * Scope: Conformance + * + * Purpose: Verifies that several queues associated with + * a single agent can have a callback associated with them, + * and that asynchronous errors detected in queue the will properly + * trigger the callback. + * + * Test Description: + * 1) Construct a list of all agents that support the + * HSA_AGENT_FEATURE_KERNEL_DISPATCH queue feature. + * 2) For each agent, create several queues with the same callback + * function. + * 3) For each queue associated with the agent: + * a) Write an invalid packet into the queue, + * using an invalid value for the packet type. + * b) Ring the doorbell for that queue. + * c) Verify that the callback executed, and that + * the status reported is HSA_STATUS_ERROR_INVALID_PACKET_FORMAT + * and that the queue id is valid. + * 4) Delete the queues associated with the agent. + * 5) Repeat 2 to 4 several times. + * 6) Repeat this test for each agent. + * + * Expected Results: The callback function should be correctly executed + * and the status and queue id pass to the callback should be correct. + */ + +#include +#include +#include +#include + +#define QUEUE_SIZE 1024 + +hsa_status_t global_status; +hsa_queue_t* global_queue_handle; +hsa_signal_t global_signal; + +void callback(hsa_status_t status, hsa_queue_t* queue_handle, void* data) { + global_status = status; + global_queue_handle = queue_handle; + hsa_signal_store_relaxed(global_signal, 1); + return; +} + +int test_queue_callback() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + + // Get the agent list + get_agent_list(&agent_list); + + int jj; + + for (jj = 0; jj < agent_list.num_agents; ++jj) { + hsa_queue_feature_t feature; + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Only test on agents the support the queue dispatch feature. + if (!(feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Initialize the global signal + status = hsa_signal_create(0, 0, NULL, &global_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query the agent properties + uint32_t queue_max_size; + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + queue_max_size = (queue_max_size < QUEUE_SIZE) ? queue_max_size : QUEUE_SIZE; + + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create queue_max queues for the agent. + int ii; + hsa_queue_t* queue[queue_max]; + for (ii = 0; ii < queue_max; ++ii) { + status = hsa_queue_create(agent_list.agents[jj], queue_max_size, HSA_QUEUE_TYPE_SINGLE, callback, NULL, UINT32_MAX, UINT32_MAX, &queue[ii]); + if (HSA_STATUS_SUCCESS != status) { + ASSERT(HSA_STATUS_ERROR_OUT_OF_RESOURCES == status); + queue_max = ii; + } + } + + // For each queue, write a packet with an invalid code object + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, packet_size); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header|= 1 << HSA_PACKET_HEADER_BARRIER; + + hsa_kernel_dispatch_packet_t* queue_packet; + for (ii = 0; ii < queue_max; ++ii) { + // Initialize the global variables + global_status = HSA_STATUS_SUCCESS; + global_queue_handle = 0; + hsa_signal_store_relaxed(global_signal, 0); + + // Dispatch the packet. + enqueue_dispatch_packet(queue[ii], &dispatch_packet); + + // Wait on the global signal value to change to 1 + while (1 != hsa_signal_wait_relaxed(global_signal, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + // Verify the global_status and global_queue_handle values were set correctly + ASSERT(global_queue_handle == queue[ii]); + ASSERT(HSA_STATUS_ERROR_INVALID_CODE_OBJECT == global_status); + } + + // Destroy the queues + for (ii = 0; ii < queue_max; ++ii) { + status = hsa_queue_destroy(queue[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the global signal + status = hsa_signal_destroy(global_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free_agent_list(&agent_list); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/queue/test_queue_create_concurrent.c b/src/core/queue/test_queue_create_concurrent.c new file mode 100644 index 0000000..dfcbe6d --- /dev/null +++ b/src/core/queue/test_queue_create_concurrent.c @@ -0,0 +1,299 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_create_concurrent + * Scope: Conformance + * + * Purpose: Verifies that queues can be created concurrently + * using a single agent as a parameter. All agents should + * be checked. + * + * Test Description: + * 1) For each agent in the system, + * a) Query the agent to determine its HSA_AGENT_INFO_QUEUES_MAX + * parameter. + * b) Create n threads, each of which attempts to create m queues + * such that m * n > HSA_AGENT_INFO_QUEUES_MAX. + * c) The threads should gracefully exit, and not indicate a test + * failure if HSA_STATUS_ERROR_OUT_OF_RESOURCES is returned. + * d) Each thread should increment a global, atomic value that + * indicates the total number of queues created. + * 2) After all threads have returned, count the number of created queues. + * This number should be equal to the maximum number of queues supported + * by the agent. + * 3) Check that each queue has a unique queue_id. + * 4) Check that the read pointer and writer pointer are + * initialized to 0. + * 5) Create a simple kernel and verify that it will run on each queue. + * 6) Destroy all queues in the main thread with hsa_queue_destroy. + * 7) Repeat this for the same agent several times. + * + * Expected Results: All queues should be successfully created and all properties + * should be initialized correctly. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define N_THREADS 8 + +static int m_queues; +static int num_queues; + +static pthread_mutex_t queue_idx_mutex = PTHREAD_MUTEX_INITIALIZER; + +hsa_queue_t **queues; + +// Function to compare agent ids +int compare_agent_ids(const void *a, const void *b) { + return *((uint32_t*) a) - *((uint32_t*) b); +} + +// Work function for creating queues +void test_create_queue(void *data) { + hsa_status_t status; + hsa_agent_t* agent = (hsa_agent_t *)data; + hsa_queue_t* queue; + + // Create m_queues queue + int ii; + for (ii = 0; ii < m_queues; ++ii) { + status = hsa_queue_create(*agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + + // Check status, if status is HSA_STATUS_ERROR_OUT_OF_RESOURCES, it + // indicates the total number of queues reaches the upper bound + ASSERT(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_OUT_OF_RESOURCES); + + if (HSA_STATUS_SUCCESS == status) { + pthread_mutex_lock(&queue_idx_mutex); + queues[num_queues] = queue; + ++num_queues; + pthread_mutex_unlock(&queue_idx_mutex); + } + } +} + +int test_queue_create_concurrent() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get agent list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int jj; + // For each agent + for (jj = 0; jj < agent_list.num_agents; ++jj) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + // Continue if this agent does not support DISPATCH + continue; + } + + uint32_t queue_max; + + m_queues = 4; + num_queues = 0; + + // Get max number of queues + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Set the total number of queues larger than the max number of queues + while ((N_THREADS * m_queues) <= queue_max) { + m_queues *= 2; + } + + queues = (hsa_queue_t **) malloc(sizeof(hsa_queue_t *) * N_THREADS * m_queues); + + struct test_group *tg_concurrent_create_queue = test_group_create(N_THREADS); + + test_group_add(tg_concurrent_create_queue, &test_create_queue, agent_list.agents + jj, N_THREADS); + test_group_thread_create(tg_concurrent_create_queue); + test_group_start(tg_concurrent_create_queue); + test_group_wait(tg_concurrent_create_queue); + test_group_exit(tg_concurrent_create_queue); + test_group_destroy(tg_concurrent_create_queue); + + // Create an array to store id for every queue + uint32_t ids[num_queues]; + + int ii; + for (ii = 0; ii < num_queues; ++ii) { + ids[ii] = queues[ii]->id; + + // check if the read_pointer and write_pointer are initialized to zero + uint64_t read_pointer, write_pointer; + read_pointer = hsa_queue_load_read_index_acquire(queues[ii]); + ASSERT(read_pointer == 0); + + write_pointer = hsa_queue_load_write_index_acquire(queues[ii]); + ASSERT(write_pointer == 0); + } + + // Sort ids + qsort(ids, num_queues, sizeof(uint32_t), compare_agent_ids); + + // Check if two ids are same + for (ii = 1; ii < num_queues; ++ii) { + ASSERT_MSG(ids[ii - 1] != ids[ii], "the ids of queues are not unique\n"); + } + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[jj], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[jj], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a completion signal. + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Fill in info for the default dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal = signal; + + for (ii = 1; ii < num_queues; ++ii) { + // Reset the signal value + hsa_signal_store_relaxed(signal, 1); + + // launch the kernel + enqueue_dispatch_packet(queues[ii], &dispatch_packet); + + // Wait for the kernel to finish + hsa_signal_value_t value = hsa_signal_wait_relaxed(signal, + HSA_SIGNAL_CONDITION_EQ, + 0, + UINT64_MAX, + HSA_WAIT_STATE_BLOCKED); + ASSERT(value == 0); + } + + // Destroy the signal + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy queues + for (ii = 0; ii < num_queues; ++ii) { + hsa_queue_destroy(queues[ii]); + } + + free(queues); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_create_parameters.c b/src/core/queue/test_queue_create_parameters.c new file mode 100644 index 0000000..ba7ad2d --- /dev/null +++ b/src/core/queue/test_queue_create_parameters.c @@ -0,0 +1,212 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_create_parameters + * Scope: Conformance + * + * Purpose: Verifies that each agent on the platform can + * create a queue that satisfies all specified queue parameters. + * + * Test Description: + * 1) For each agent in the system. + * a) Query the HSA_AGENT_INFO_FEATURE attribute. + * b) Query the HSA_AGENT_INFO_QUEUE_MAX_SIZE attribute. + * c) Query the HSA_AGENT_INFO_QUEUE_TYPE attribute. + * 2) For each agent create a using the maximal set of parameters, + * i.e. if a queue supports a given maximum size, HSA_QUEUE_TYPE_MULTI + * and HSA_AGENT_FEATURE_KERNEL_DISPATCH, create a queue with those + * attributes. + * 3) Attempt to dispatch a simple kernel to the queue, if the + * queue supports HSA_AGENT_FEATURE_KERNEL_DISPATCH. + * 4) Destroy each queue. + */ + +#include +#include +#include +#include +#include +#include + +// The NUM_KERNELS must be a power of 2 +#define NUM_KERNELS 16 + +int test_queue_create_parameters() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get the agent list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + uint32_t queue_max_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max_size); + ASSERT(status == HSA_STATUS_SUCCESS); + + hsa_queue_type_t queue_type; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + ASSERT(status == HSA_STATUS_SUCCESS); + + hsa_queue_feature_t feature; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(status == HSA_STATUS_SUCCESS); + + if (feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { + // Create the queue + ASSERT(HSA_QUEUE_TYPE_SINGLE == queue_type || HSA_QUEUE_TYPE_MULTI == queue_type); + hsa_queue_t *queue; + status = hsa_queue_create(agent_list.agents[ii], queue_max_size, queue_type, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Signal array + hsa_signal_t signals[NUM_KERNELS]; + + int jj; + for (jj = 0; jj < NUM_KERNELS; ++jj) { + status = hsa_signal_create(1, 0, NULL, &signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Fill info for the default dispatch_packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + + // Enqueue dispatch packets + hsa_kernel_dispatch_packet_t* queue_packet; + for (jj = 0; jj < NUM_KERNELS; ++jj) { + dispatch_packet.completion_signal = signals[jj]; + enqueue_dispatch_packet(queue, &dispatch_packet); + } + + // Wait until all dispatch packets finish executing + for (jj = 0; jj < NUM_KERNELS; ++jj) { + hsa_signal_value_t value = hsa_signal_wait_relaxed(signals[jj], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + ASSERT(0 == value); + } + + // Destroy signals + for (jj = 0; jj < NUM_KERNELS; ++jj) { + status = hsa_signal_destroy(signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_destroy_concurrent.c b/src/core/queue/test_queue_destroy_concurrent.c new file mode 100644 index 0000000..0b44f02 --- /dev/null +++ b/src/core/queue/test_queue_destroy_concurrent.c @@ -0,0 +1,167 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_destroy_concurrent + * Scope: Conformance + * + * Purpose: Verifies that queues can be destroyed concurrently. + * + * Test Description: + * 1) For all agents in the system, + * a) Query the agent to determine its HSA_AGENT_INFO_QUEUES_MAX + * parameter. + * b) Create HSA_AGENT_INFO_QUEUES_MAX queues for that agent. + * c) The queues should be stored in a global array, in no particular + * order. + * 2) Create n threads, each assigned a range in the array. Each thread will + * a) Destroy m queues, such that n * m = Total # of queues. + * 7) Repeat this several times. + * + * Expected Results: All queues should be successfully created and destroyed. + * + */ + +#include +#include +#include +#include +#include + +#define N_THREADS 16 + +static int m_queues; + +hsa_queue_t **queues; + +typedef struct queue_destroy_params { + hsa_queue_t** queues; + int count; +} queue_destroy_params_t; + +// work function for destroying queues +void test_destroy_queue(void *data) { + int ii; + hsa_status_t status; + + // Get offset of queues + queue_destroy_params_t* params = (queue_destroy_params_t*)data; + + for (ii = 0; ii < params->count; ++ii) { + status = hsa_queue_destroy(params->queues[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } +} + +int test_queue_destroy_concurrent() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + uint32_t queue_max = 0; + int jj; + + // Get max number of queues that is supported by the agent + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate queue pointers for current agent. + queues = (hsa_queue_t **) malloc(sizeof(hsa_queue_t *) * queue_max); + memset(queues, 0, sizeof(hsa_queue_t*) * queue_max); + + // Create queues on current agent. + for (jj = 0; jj < queue_max; ++jj) { + status = hsa_queue_create(agent_list.agents[ii], 4, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queues[jj]); + if (HSA_STATUS_SUCCESS != status) { + ASSERT(HSA_STATUS_ERROR_OUT_OF_RESOURCES == status); + queue_max = jj; + } + } + + // Calculate the number of queues assigned to each thread to be destroyed. + // Each thread may have different num of queues, depending on (0 == queue_max % N_THREADS). + queue_destroy_params_t params[N_THREADS]; + + int k; + for (k = 0; k < N_THREADS; ++k) { + params[k].count = queue_max / N_THREADS; + if ((queue_max % N_THREADS != 0) && k + 1 <= (queue_max % N_THREADS - 1)) { + params[k].count += 1; + } + } + int total_count = 0; + for (k = 0; k < N_THREADS; ++k) { + params[k].queues = &(queues[total_count]); + total_count += params[k].count; + } + + struct test_group *tg_concurrent_create_queue = test_group_create(N_THREADS); + + for (k = 0; k < N_THREADS; ++k) { + test_group_add(tg_concurrent_create_queue, &test_destroy_queue, ¶ms[k], 1); + } + + test_group_thread_create(tg_concurrent_create_queue); + test_group_start(tg_concurrent_create_queue); + test_group_wait(tg_concurrent_create_queue); + test_group_exit(tg_concurrent_create_queue); + test_group_destroy(tg_concurrent_create_queue); + + free(queues); + } + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_dispatch_concurrent.c b/src/core/queue/test_queue_dispatch_concurrent.c new file mode 100644 index 0000000..c889992 --- /dev/null +++ b/src/core/queue/test_queue_dispatch_concurrent.c @@ -0,0 +1,334 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_dispatch_concurrent + * Scope: Conformance + * + * Purpose: Verifies that a queue can have AQL packets written + * to it concurrently, and that the AQL packets are properly + * executed. + * + * Test Description: + * 1) Query the list of agents for all agents that support the + * HSA_QUEUE_FEATURE_DISPATCH queue feature. + * 2) For each agent create a queue associated with the agent with a specified + * size less than HSA_AGENT_INFO_QUEUE_MAX_SIZE attribute. + * 3) Create several different threads that concurrently operate on the queue, + * performing the following operations: + * a) Create a signal for use in dispatches. + * b) Allocate a small memory location for use with the data_init + * kernel. + * c) Load and initialize the data_init kernel. + * d) Call hsa_queue_add_write_index_acquire to obtain a valid write + * index in the queue. + * e) Calls hsa_queue_load_read_index_relaxed in a loop until the write + * index is less than the sum of the read index and the queue size. + * f) Populates the packet at the write index with a dispatch packet + * that launches the init_data kernel. The packet and the kernel parameters + * should be configured such that a small, one dimensional memory location + * should be initialized with a unique value associated with the thread. + * g) The thread should wait on the signal for the dispatch to finish. + * h) The thread should verify that the memory location was properly initialized. + * i) Repeat steps d through i several times, terminating only when the write + * index is equal to a set multiple of the specified queue size. + * 4) Repeat this test for each agent/queue. + * + * Expected Results: All dispatches should succeed and the data should be initialized + * correctly. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define ARGUMENT_ALIGN_BYTES 16 +#define CON_QUEUE_DISP_NUM_THREADS 32 +#define CON_QUEUE_DISP_DEFAULT_QUEUE_SIZE 256 +#define CON_QUEUE_DISP_TERMINATION_MULTIPLES 2 + +typedef struct queue_dispatch_params { + hsa_agent_t* agent; + hsa_queue_t* queue; + symbol_record_t* symbol_record; +} queue_dispatch_params_t; + +// Work function for concurrent queue dispatch +void thread_proc_dispatch(void* data) { + hsa_status_t status; + + queue_dispatch_params_t* param = (queue_dispatch_params_t*) data; + int num_dispatch_packets = param->queue->size * CON_QUEUE_DISP_TERMINATION_MULTIPLES; + + // Allocate a memory block used by the kernel. + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(*(param->agent), get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + const int block_size = 1024; + uint32_t* data_block; + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_block); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Declare the kernarg data structure + struct __attribute__ ((aligned(ARGUMENT_ALIGN_BYTES))) con_queue_dispatch_arg_t { + void* data; + uint32_t value; + uint32_t row_pitch; + uint32_t slice_pitch; + } args; + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(*(param->agent), get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate the kernel argument buffer from the correct region + void* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, sizeof(args), &kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a signal for dispatch + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Fill in info for the default dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.group_segment_size = param->symbol_record->group_segment_size; + dispatch_packet.private_segment_size = param->symbol_record->private_segment_size; + dispatch_packet.kernel_object = param->symbol_record->kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal = signal; + + int ii; + for (ii = 0; ii < num_dispatch_packets; ++ii) { + // Reinitialize the signal's value + hsa_signal_store_relaxed(signal, 1); + + // Setup the kernarg arguments + args.data = data_block; + args.value = (uint32_t)ii; + args.row_pitch = (uint32_t)0; + args.slice_pitch = (uint32_t)0; + memcpy(kernarg_buffer, &args, sizeof(args)); + + // Initialize the packet with specific parameters. + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + + // Dispatch the kernel + enqueue_dispatch_packet(param->queue, &dispatch_packet); + + // Wait until the kernel complete + hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Verify the kernel executed correctly + int valid = 1; + int failIndex = 0; + int jj; + for (jj = 0; jj < block_size; ++jj) { + if (data_block[jj] != (uint32_t)ii) { + failIndex = jj; + valid = 0; + } + } + } + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(data_block); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +int test_queue_dispatch_concurrent() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if a queue on this agent support QUEUE_TYPE_MULTI + hsa_queue_type_t queue_type; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_QUEUE_TYPE_MULTI != queue_type) { + continue; + } + + // Get the maximum number of queues that is supported on this agent + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queue, skip it + continue; + } + + // Adjust the queue size + uint32_t queue_size = CON_QUEUE_DISP_DEFAULT_QUEUE_SIZE; + uint32_t queue_max_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + while (queue_size > queue_max_size) { + queue_size /= 2; + } + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The thread parameters + queue_dispatch_params_t params; + params.agent = agent_list.agents + ii; + params.queue = queue; + params.symbol_record = &symbol_record; + + // Create the test group + struct test_group* tg_concurrent_queue_dispatch = test_group_create(CON_QUEUE_DISP_NUM_THREADS); + test_group_add(tg_concurrent_queue_dispatch, &thread_proc_dispatch, ¶ms, CON_QUEUE_DISP_NUM_THREADS); + test_group_thread_create(tg_concurrent_queue_dispatch); + test_group_start(tg_concurrent_queue_dispatch); + test_group_wait(tg_concurrent_queue_dispatch); + test_group_exit(tg_concurrent_queue_dispatch); + test_group_destroy(tg_concurrent_queue_dispatch); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_full.c b/src/core/queue/test_queue_full.c new file mode 100644 index 0000000..05091f5 --- /dev/null +++ b/src/core/queue/test_queue_full.c @@ -0,0 +1,314 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_full + * Scope: Conformance + * + * Purpose: Verifies a queue can be completely filled with packets + * before execution starts and that all submissions still execute correctly. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_AGENT_FEATURE_KERNEL_DISPATCH + * queue feature. + * 2) For each agent create a queue with a specified size equal to the + * HSA_AGENT_INFO_QUEUE_MAX_SIZE agent attribute. + * 3) Create one signal. + * 4) Load and initialize the init_data kernel. + * 5) Allocate small memory locations that can be used by the execution of the + * init_data kernel. There should be enough for each packet slot. + * 6) Populate (HSA_AGENT_INFO_QUEUE_MAX_SIZE - 1) queue packet slots with a + * dispatch packet that will dispatch the init_data kernel, but set the packet type + * to HSA_PACKET_TYPE_ALWAYS_RESERVED. Set the signal value for each one to 0, + * indicating no signal will be used. + * 7) Populate the last slot with a dispatch packet that will dispatch the init_data + * kernel and set the packet type to HSA_PACKET_TYPE_DISPATCH. Set the signal value + * to the single valid signal created. + * 8) Step backward through the queue's packets, setting the type to HSA_PACKET_TYPE_DISPATCH. + * 9) Ring the queue's doorbell. + * 10) Wait for the signal to indicate that the last packet has finished executing. + * 11) Check that the read and write index are equal. + * 12) Validate that all of the memory locations have been successfully updated. + * 14) Repeat 3 through 13 for each agent. + * + * Expected Results: All queues should be successfully created, all dispatches + * should finish and all of the data should be initialized correctly. + */ + +#include +#include +#include +#include + +#define ARGUMENT_ALIGN_BYTES 16 + +#define ARRAY_SIZE 128 + +int test_queue_full() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get the agent list + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii, jj, kk; + + for (jj = 0; jj < agent_list.num_agents; ++jj) { + // Only test on agents the support the queue dispatch feature. + hsa_queue_feature_t feature; + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (!(feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // At least one queue should be supported for the agent. + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(0 < queue_max); + + // Query the agent properties + uint32_t queue_max_size; + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a global region + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[jj], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 != global_region.handle) { + // Skip this agent if global fine grained memory isn't available + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[jj], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[jj], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[jj], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the queue for the agent. + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[jj], queue_max_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a single signal for the last dispatch and initialize it to 1 + hsa_signal_t signal; + hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the prototype packet data. + hsa_kernel_dispatch_packet_t dispatch_packet; + size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + memset(&dispatch_packet, 0, packet_size); + dispatch_packet.header |= HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = ARRAY_SIZE; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = ARRAY_SIZE; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal.handle = 0; + + // This array contains pointers to allocated argument buffers, one per dispatch + void* kernel_arg_buffer[queue->size]; + uint32_t* data_buffer[queue->size]; + + // Argument prototype + struct __attribute__((aligned(ARGUMENT_ALIGN_BYTES))) args_t { + uint64_t data; + uint32_t value; + uint32_t row_pitch; + uint32_t slice_pitch; + } args; + + // Kernel argument allocation and buffer allocation + for (ii = 0; ii < queue->size; ++ii) { + // Allocate the kernel argument structure for the dispatch + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + &kernel_arg_buffer[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the data buffer + status = hsa_memory_allocate(global_region, + ARRAY_SIZE*sizeof(uint32_t), + (void**) &data_buffer[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Block until the queue is empty + uint64_t delta; + uint64_t write_index = hsa_queue_load_write_index_relaxed(queue); + do { + delta = write_index - hsa_queue_load_read_index_relaxed(queue); + } while (delta > 0); + + // Enqueue all of the packets + const uint32_t queue_mask = queue->size - 1; + for (ii = 0; ii < queue->size; ++ii) { + // Set the kernel arguments and initialize the data + memset(data_buffer[ii], 0, ARRAY_SIZE*sizeof(uint32_t)); + args.data = (uint64_t) data_buffer[ii]; + args.value = (uint32_t) ii; + args.row_pitch = 0; + args.slice_pitch = 0; + memcpy(kernel_arg_buffer[ii], &args, sizeof(args)); + // Set the packet kernel argument data. + dispatch_packet.kernarg_address = (void*) kernel_arg_buffer[ii]; + // Increment the write index of the queue, reserving a slot + write_index = hsa_queue_add_write_index_relaxed(queue, 1); + + // Initialize the packets. + ((hsa_kernel_dispatch_packet_t*)(queue->base_address))[write_index&queue_mask]=dispatch_packet; + } + + // For the last queue, set the completion signal + ((hsa_kernel_dispatch_packet_t*)(queue->base_address))[queue_mask].completion_signal = signal; + + // Step backward through the queue, setting the packet type to HSA_PACKET_TYPE_DISPATCH + for (ii = queue->size-1; ii >= 0; --ii) { + ((hsa_kernel_dispatch_packet_t*)(queue->base_address))[ii&queue_mask].header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + } + + // Ring the doorbell using the last write_index + hsa_signal_store_relaxed(queue->doorbell_signal, write_index); + + // Wait on the signal value to decrement + status = hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Validate the data + for (ii = 0; ii < queue->size; ++ii) { + uint32_t* ptr = data_buffer[ii]; + for (kk = 0; kk < ARRAY_SIZE; ++kk) { + ASSERT(*(ptr+kk) == ii); + } + } + + // Deallocate the data + for (ii = 0; ii < queue->size; ++ii) { + // Free the associated data arrays + status = hsa_memory_free(data_buffer[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + // Free the kernel argument structure + status = hsa_memory_free(kernel_arg_buffer[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the signal + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_inactivate.c b/src/core/queue/test_queue_inactivate.c new file mode 100644 index 0000000..393612f --- /dev/null +++ b/src/core/queue/test_queue_inactivate.c @@ -0,0 +1,314 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_inactivate + * Scope: Conformance + * + * Purpose: Verifies a queue will become inactive after the + * hsa_queue_inactivate is call using the queue as a parameter. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For each agent create a queue. + * 4) Load and initialize the init_data kernel. + * 5) Allocate small memory locations that can be used by the execution of the + * init_data kernel. + * 6) Dispatch several init_data kernel executions, and verify that they are executed + * correctly. + * 7) Call the hsa_queue_inactivate API on the queue. + * 8) Dispatch several init_data kernel executions, and verify that they are not + * executed. + * 14) Repeat 3 through 13 for each agent. + * + * Expected Results: Any dispatch after the queue is inactivated should be ignored. + */ + +#include +#include +#include +#include +#include +#include + +#define ARGUMENT_ALIGN_BYTES 16 +#define QUEUE_INACTIVATE_DEFAULT_QUEUE_SIZE 64 +#define QUEUE_INACTIVATE_NUM_PACKETS 16 + +void launch_inactivated_test_kernels( + hsa_agent_t* agent, + hsa_queue_t* queue, + symbol_record_t* symbol_record, + int inactivated) { + hsa_status_t status; + int ii; + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(*agent, get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + // Memory blocks used by the kernel arg + const int block_size = 1024; + uint32_t** data_blocks = (uint32_t**)malloc(QUEUE_INACTIVATE_NUM_PACKETS * sizeof(uint32_t*)); + for (ii = 0; ii < QUEUE_INACTIVATE_NUM_PACKETS; ++ii) { + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_blocks[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + memset(data_blocks[ii], 0, block_size * sizeof(uint32_t)); + } + + // Kernarg data structure + typedef struct __attribute__ ((aligned(ARGUMENT_ALIGN_BYTES))) multi_queue_dispatch_arg { + void* data; + uint32_t value; + uint32_t row_pitch; + uint32_t slice_pitch; + } multi_queue_dispatch_arg_t; + multi_queue_dispatch_arg_t args; + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(*agent, get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate the kernel argument buffer from the correct region + multi_queue_dispatch_arg_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, QUEUE_INACTIVATE_NUM_PACKETS * sizeof(args), (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the signal with initial value of "num_packets" + hsa_signal_t signal; + status = hsa_signal_create((hsa_signal_value_t) QUEUE_INACTIVATE_NUM_PACKETS, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.completion_signal = signal; + dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = block_size; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = block_size; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record->group_segment_size; + dispatch_packet.private_segment_size = symbol_record->private_segment_size; + dispatch_packet.kernel_object = symbol_record->kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + // Populate dispatch packets + for (ii = 0; ii < QUEUE_INACTIVATE_NUM_PACKETS; ++ii) { + // Setup the kernarg + args.data = data_blocks[ii]; + args.value = (uint32_t)(ii + 1); + args.row_pitch = (uint32_t)0; + args.slice_pitch = (uint32_t)0; + memcpy(kernarg_buffer + ii, &args, sizeof(args)); + dispatch_packet.kernarg_address = (void*)(kernarg_buffer + ii); + + // Enqueue the packet + enqueue_dispatch_packet(queue, &dispatch_packet); + } + + // Wait until all the kernels are complete + if (0 == inactivated) { + while (0 != hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + } else { + hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, (uint64_t) 10, HSA_WAIT_STATE_BLOCKED); + } + + // Destroy the signal + hsa_signal_destroy(signal); + + // Verify all kernels are executed correctly + int valid = 1; + int failPacketIdx = 0; + int failDataIdx = 0; + for (ii = 0; ii < QUEUE_INACTIVATE_NUM_PACKETS; ++ii) { + int jj; + for (jj = 0; jj < block_size; ++jj) { + if (inactivated) { + // Expect the data_block[ii] is all 0s since none of + // Kernels have been executed + if (0 != data_blocks[ii][jj]) { + valid = 0; + failPacketIdx = ii; + failDataIdx = jj; + break; + } + } else { + // Expect the data_block[ii] hsa been updated by kernels + if (data_blocks[ii][jj] != (ii + 1)) { + valid = 0; + failPacketIdx = ii; + failDataIdx = jj; + break; + } + } + } + if (0 == valid) { + break; + } + } + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the memory addresses used by kernel arg + for (ii = 0; ii < QUEUE_INACTIVATE_NUM_PACKETS; ++ii) { + status = hsa_memory_free(data_blocks[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(data_blocks); +} + +int test_queue_inactivate(void) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get the maximum number of queues that is supported on this agent + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + if (queue_max < 1) { + continue; + } + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], QUEUE_INACTIVATE_DEFAULT_QUEUE_SIZE, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Launch kernels before inactivation + launch_inactivated_test_kernels(agent_list.agents + ii, queue, &symbol_record, 0); + + // Inactivate the queue + status = hsa_queue_inactivate(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Launch kernels after inactivation + launch_inactivated_test_kernels(agent_list.agents + ii, queue, &symbol_record, 1); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_multi_gap.c b/src/core/queue/test_queue_multi_gap.c new file mode 100644 index 0000000..c8903d0 --- /dev/null +++ b/src/core/queue/test_queue_multi_gap.c @@ -0,0 +1,371 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_multi_gap + * Scope: Conformance + * + * Purpose: Verifies a queue can contain regions where packets have type + * HSA_PACKET_TYPE_ALWAYS_RESERVED between regions where there are valid + * packet types, i.e. HSA_PACKET_TYPE_DISPATCH, and that the packet processor + * will block until the packet type has changed. + * + * Test Description: + * 1) Obtain the list of all agents that support the HSA_QUEUE_FEATURE_DISPATCH + * queue feature. + * 2) For a valid agent create a queue of type HSA_QUEUE_TYPE_MULTI. + * 3) Load and initialize the init_data kernel. + * 4) Allocate small memory locations that can be used by the execution of the + * init_data kernel. There should be enough for each packet slot. + * 5) Create three signals. + * 6) Reserve three packet regions in the queue of equal size. + * 7) Populate the first and last packet regions with valid dispatch packets. + * 8) Use the first signal as the completion signal in the last packet of the first region. + * 9) Use the third signal as the completion signal in the last packet of the third region. + * 10) Ring the doorbell twice, once with the first region's last write index and once + * with the third regions last write index. + * 11) Wait on the first signal to trigger. + * 12) Validate the memory regions initialized by the execution of the packets + * in the first region. + * 12) Check the value of the third signal and ensure it has not triggered. + * 13) Populate the middle region with valid dispatch packets. + * 14) Use the second signal as the completion signal in the last packet of the middle region. + * 15) Ring the doorbell using the last write index of the middle region as the value. + * 16) Wait on the second signal to trigger. + * 17) Wait on the third signal to trigger. + * 12) Validate that the memory locations associated with the packets in the second and + * third regions have been successfully updated. + * 13) Repeat 6 through 12 several times. + * 14) Repeat 2 through 13 for each agent. + * + * Expected Results: All dispatches should finish and all of the data should be initialized correctly. + */ + +#include +#include +#include +#include +#include +#include + +#define ARGUMENT_ALIGN_BYTES 16 +#define MULTI_GAP_QUEUE_SIZE 128 +#define MULTI_GAP_REGION_SIZE 32 +#define MULTI_GAP_TEST_REPEAT 4 +#define MULTI_GAP_KERNEL_DATA_SIZE 1024 + +// Check the memory block of each individual region +void verify_data(uint32_t** data_blocks, int initialized, int region_index) { + int ii; + int fail = 0; + for (ii = 0; ii < MULTI_GAP_REGION_SIZE; ++ii) { + uint32_t expected_value; + if (initialized) { + // The data block has been initialized by the kernels + expected_value = ii + 1; + } else { + // The data block has not been initialized + expected_value = 0; + } + int jj; + for (jj = 0; jj < MULTI_GAP_KERNEL_DATA_SIZE; ++jj) { + if (data_blocks[ii][jj] != expected_value) { + fail = 1; + break; + } + } + if (fail) { + break; + } + } +} + +int test_queue_multi_gap() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region in the global segment that supports fine grained memory + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 == global_region.handle) { + // Skip this agent if it doesn't support fine grained memory + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Get the maximum number of queues that is supported on this agent + uint32_t queues_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(HSA_STATUS_SUCCESS == status); + if (queues_max < 1) { + continue; + } + + // Create the queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], MULTI_GAP_QUEUE_SIZE, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate memory blocks used by the kernel arg + int jj; + uint32_t** data_blocks = (uint32_t**) malloc(3 * MULTI_GAP_REGION_SIZE * sizeof(uint32_t*)); + for (jj = 0; jj < 3*MULTI_GAP_REGION_SIZE; ++jj) { + status = hsa_memory_allocate(global_region, MULTI_GAP_KERNEL_DATA_SIZE * sizeof(uint32_t), (void**) &data_blocks[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // The kernarg data structure + typedef struct __attribute__ ((aligned(ARGUMENT_ALIGN_BYTES))) queue_multi_gap_arg { + void* data; + uint32_t value; + uint32_t row_pitch; + uint32_t slice_pitch; + } queue_multi_gap_arg_t; + queue_multi_gap_arg_t args; + + // Allocate the kernel argument buffer from the correct region + queue_multi_gap_arg_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, 3 * MULTI_GAP_REGION_SIZE * sizeof(args), (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the kernel arg data structure + for (jj = 0; jj < 3*MULTI_GAP_REGION_SIZE; ++jj) { + args.data = data_blocks[jj]; + args.value = jj % MULTI_GAP_REGION_SIZE + 1; + args.row_pitch = 0; + args.slice_pitch = 0; + memcpy(kernarg_buffer + jj, &args, sizeof(queue_multi_gap_arg_t)); + } + + // Setup the dispatch packet. + hsa_kernel_dispatch_packet_t dispatch_packet; + size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + memset(&dispatch_packet, 0, packet_size); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = MULTI_GAP_KERNEL_DATA_SIZE; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = MULTI_GAP_KERNEL_DATA_SIZE; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal.handle = 0; + + // Repeat the tests on the same queue + for (jj = 0; jj < MULTI_GAP_TEST_REPEAT; ++jj) { + // Clear the memory blocks + int kk; + for (kk = 0; kk < 3*MULTI_GAP_REGION_SIZE; ++kk) { + memset(data_blocks[kk], 0, MULTI_GAP_KERNEL_DATA_SIZE * sizeof(uint32_t)); + } + + // Reserve write index's for each region. + uint64_t write_index[3]; + write_index[0] = hsa_queue_add_write_index_acquire(queue, MULTI_GAP_REGION_SIZE); + write_index[1] = hsa_queue_add_write_index_acquire(queue, MULTI_GAP_REGION_SIZE); + write_index[2] = hsa_queue_add_write_index_acquire(queue, MULTI_GAP_REGION_SIZE); + + // Create a completion signal for each region + hsa_signal_t signals[3]; + status = hsa_signal_create(MULTI_GAP_REGION_SIZE, 0, NULL, &signals[0]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_create(MULTI_GAP_REGION_SIZE, 0, NULL, &signals[1]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_create(MULTI_GAP_REGION_SIZE, 0, NULL, &signals[2]); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Populate the first region with packets, and ring the doorbell + dispatch_packet.completion_signal = signals[0]; + for (kk = 0; kk < MULTI_GAP_REGION_SIZE; ++kk) { + dispatch_packet.kernarg_address = (void*)(kernarg_buffer + kk); + enqueue_dispatch_packet_at((uint64_t) (write_index[0] + kk), queue, &dispatch_packet); + } + + // Populate the third region with packets, and ring the doorbell + dispatch_packet.completion_signal = signals[2]; + for (kk = 0; kk < MULTI_GAP_REGION_SIZE; ++kk) { + dispatch_packet.kernarg_address = (void*)(kernarg_buffer + 2 * MULTI_GAP_REGION_SIZE + kk); + enqueue_dispatch_packet_at((uint64_t) (write_index[2] + kk), queue, &dispatch_packet); + } + + // Wait for the first signal to indicate all kernels have finished + hsa_signal_wait_relaxed(signals[0], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Verify the second signal hasn't been modified + hsa_signal_value_t sig_value = hsa_signal_load_acquire(signals[1]); + ASSERT(MULTI_GAP_REGION_SIZE == sig_value); + + // Verify the third signal hasn't been modified + sig_value = hsa_signal_load_acquire(signals[2]); + ASSERT(MULTI_GAP_REGION_SIZE == sig_value); + + // Verify the data has been updated in the 1st region + verify_data(data_blocks, 1, 0); + + // Populate the second region with packets and ring the doorbell + dispatch_packet.completion_signal = signals[1]; + for (kk = 0; kk < MULTI_GAP_REGION_SIZE; ++kk) { + dispatch_packet.kernarg_address = (void*)(kernarg_buffer + MULTI_GAP_REGION_SIZE + kk); + enqueue_dispatch_packet_at((uint64_t) (write_index[1] + kk), queue, &dispatch_packet); + } + + // Wait for 2nd signal to indicate all kernels have finished + hsa_signal_wait_relaxed(signals[1], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Wait for 3rd signal to indicate all kernels have finished + hsa_signal_wait_relaxed(signals[2], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Verify the data has been updated in the 2nd region + verify_data(data_blocks + MULTI_GAP_REGION_SIZE, 1, 1); + + // Verify the data has been updated in the 3rd region + verify_data(data_blocks + 2 * MULTI_GAP_REGION_SIZE, 1, 2); + + // Destroy the signals + status = hsa_signal_destroy(signals[0]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_destroy(signals[1]); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_destroy(signals[2]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the memory addresses used by kernel arg + for (jj = 0; jj < 3*MULTI_GAP_REGION_SIZE; ++jj) { + status = hsa_memory_free(data_blocks[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(data_blocks); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_multiple_dispatch.c b/src/core/queue/test_queue_multiple_dispatch.c new file mode 100644 index 0000000..cd614fe --- /dev/null +++ b/src/core/queue/test_queue_multiple_dispatch.c @@ -0,0 +1,361 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_multiple_dispatch + * Scope: Conformance + * + * Purpose: Verifies that a several queues associated with + * a single agent can have AQL packets written to them concurrently, + * and that the AQL packets are properly executed. + * + * Test Description: + * 1) Query the list of agents for all agents that support the + * HSA_AGENT_FEATURE_DISPATCH queue feature. + * 2) For each agent create several queue. + * 3) Create different threads, one for each queue, that concurrently + * operate on the agent's set of queues, performing the following operations: + * a) Create several signal for use in dispatches, one signal for each + * planned dispatch. + * b) Allocate several small memory locations for use with the data_init + * kernel. + * c) Load and initialize the data_init kernel. + * d) Call hsa_queue_add_write_index_acquire to obtain a valid write + * index in the queue. + * f) Populate the packet at the write index with a dispatch packet + * that launches the init_data kernel. The packet and the kernel parameters + * should be configured such that one of the memory location should be + * initialized with a unique value associated with the dispatch. + * g) Repeat d through f for each planned dispatch. + * h) The thread should wait on all of the signals for each dispatch to finish. + * i) The thread should verify that the memory locations were properly initialized. + * j) Repeat steps d through i several times, terminating only when the write + * index is equal to a set multiple of the specified queue size. + * 4) Repeat this test for each agent/queue set. + * + * Expected Results: All dispatches should succeed and the data should be initialized + * correctly. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define ARGUMENT_ALIGN_BYTES 16 +#define MULTI_QUEUE_DISP_DEFAULT_NUM_QUEUES 8 +#define MULTI_QUEUE_DISP_PACKET_COUNT 1024 + +typedef struct multi_dispatch_params { + hsa_agent_t* agent; + hsa_queue_t* queue; + symbol_record_t* symbol_record; +} multi_dispatch_params_t; + +// Work function for dispatch +void thread_proc_queue_multiple_dispatch(void* data) { + hsa_status_t status; + + multi_dispatch_params_t* param = (multi_dispatch_params_t*)data; + int num_dispatch_packets = MULTI_QUEUE_DISP_PACKET_COUNT; + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(*(param->agent), get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(*(param->agent), get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Allocate memory blocks used by the kernel arg + const int block_size = 1024; + uint32_t** data_blocks = (uint32_t**)malloc(num_dispatch_packets * sizeof(uint32_t*)); + int ii; + for (ii = 0; ii < num_dispatch_packets; ++ii) { + status = hsa_memory_allocate(global_region, block_size * sizeof(uint32_t), (void**) &data_blocks[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // The kernarg data structure + typedef struct __attribute__ ((aligned(ARGUMENT_ALIGN_BYTES))) multi_queue_dispatch_arg { + void* data; + uint32_t value; + uint32_t row_pitch; + uint32_t slice_pitch; + } multi_queue_dispatch_arg_t; + multi_queue_dispatch_arg_t args; + + // Allocate the kernel argument buffer from the correct region + multi_queue_dispatch_arg_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, num_dispatch_packets * sizeof(args), (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the signal with initial value of "num_dispatch_packets" + hsa_signal_t signal; + status = hsa_signal_create((hsa_signal_value_t)num_dispatch_packets, 0, NULL, &signal); + // status = hsa_signal_create((hsa_signal_value_t) 1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the dispatch packet. + hsa_kernel_dispatch_packet_t dispatch_packet; + size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.completion_signal = signal; + // dispatch_packet.completion_signal = signal; + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = block_size; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = block_size; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = param->symbol_record->kernel_object; + dispatch_packet.group_segment_size = param->symbol_record->group_segment_size; + dispatch_packet.private_segment_size = param->symbol_record->private_segment_size; + + // Dispatch the kernels + for (ii = 0; ii < num_dispatch_packets; ++ii) { + // setup the kernarg + args.data = data_blocks[ii]; + args.value = (uint32_t)ii; + args.row_pitch = (uint32_t)0; + args.slice_pitch = (uint32_t)0; + memcpy(kernarg_buffer + ii, &args, sizeof(args)); + + if (ii == (num_dispatch_packets-1)) { + dispatch_packet.completion_signal = signal; + } + + dispatch_packet.kernarg_address = (void*)(kernarg_buffer + ii); + + enqueue_dispatch_packet(param->queue, &dispatch_packet); + } + + // Wait until all the kernels are complete + hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Destroy the signal + hsa_signal_destroy(signal); + + // Verify all kernels are executed correctly + int valid = 1; + int failPacketIdx = 0; + int failDataIdx = 0; + for (ii = 0; ii < num_dispatch_packets && valid; ++ii) { + int jj; + for (jj = 0; jj < block_size && valid; ++jj) { + if (data_blocks[ii][jj] != ii) { + valid = 0; + failPacketIdx = ii; + failDataIdx = jj; + } + } + } + + ASSERT(valid == 1); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + for (ii = 0; ii < num_dispatch_packets; ++ii) { + status = hsa_memory_free(data_blocks[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(data_blocks); + + return; +} + +int test_queue_multiple_dispatch() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("init_data.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Check if a queue on this agent support QUEUE_TYPE_MULTI + hsa_queue_type_t queue_type; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + ASSERT(HSA_STATUS_SUCCESS == status); + if (HSA_QUEUE_TYPE_MULTI != queue_type) { + continue; + } + + // Get the maximum number of queues that is supported on this agent + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Adjust the number of queues + uint32_t num_queues = MULTI_QUEUE_DISP_DEFAULT_NUM_QUEUES; + while (num_queues > queue_max) { + num_queues /= 2; + } + + if (num_queues < 1) { + // this agent does not support any queue + continue; + } + + // Find the maximum queue size + uint32_t queue_size_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size_max); + + // Create the queues + hsa_queue_t** queues = (hsa_queue_t**) malloc(num_queues * sizeof(hsa_queue_t*)); + int jj; + for (jj = 0; jj < num_queues; ++jj) { + status = hsa_queue_create(agent_list.agents[ii], queue_size_max, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queues[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__init_int_data_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The thread parameters + multi_dispatch_params_t* params = (multi_dispatch_params_t*)malloc(num_queues * sizeof(multi_dispatch_params_t)); + for (jj = 0; jj < num_queues; ++jj) { + params[jj].agent = agent_list.agents + ii; + params[jj].queue = queues[jj]; + params[jj].symbol_record = &symbol_record; + } + + // Create the test group + struct test_group* tg_queue_multiple_dispatch = test_group_create(num_queues); + for (jj = 0; jj < num_queues; ++jj) { + test_group_add(tg_queue_multiple_dispatch, &thread_proc_queue_multiple_dispatch, params + jj, 1); + } + + test_group_thread_create(tg_queue_multiple_dispatch); + test_group_start(tg_queue_multiple_dispatch); + test_group_wait(tg_queue_multiple_dispatch); + test_group_exit(tg_queue_multiple_dispatch); + test_group_destroy(tg_queue_multiple_dispatch); + + // Destroy the thread parameters + free(params); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queues + for (jj = 0; jj < num_queues; ++jj) { + status = hsa_queue_destroy(queues[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + free(queues); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_multiple_queues.c b/src/core/queue/test_queue_multiple_queues.c new file mode 100644 index 0000000..aa604bb --- /dev/null +++ b/src/core/queue/test_queue_multiple_queues.c @@ -0,0 +1,268 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_multiple_queues + * Scope: Conformance + * + * Purpose: Verifies that a single agent can have multiple + * queues associated with it, and that all queues are initialized + * properly and functional. + * + * Test Description: + * 1) For each agent in the system + * a) Query the agent to determine its HSA_AGENT_INFO_QUEUES_MAX + * parameter. + * b) Create that number of queues for the agent, using default + * values for other creation parameters, if possible. + * 2) Verify that each queue hsa a unique queue_id. Note that this + * id should be unique for every queue, not just queues belonging + * to an agent. + * 3) Check that the read pointer and write pointer are + * initialized to 0. + * 4) Create a simple kernel and verify that it will run on each queue. + * 5) Destroy all queues in the main thread with hsa_queue_destroy. + * + * Expected Results: All queues should be successfully created and + * all properties should be initialized correctly. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +static const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + +// Function to compare queue ids +int compare_queue_ids(const void *a, const void *b) { + return *((uint32_t*) a) - *((uint32_t*) b); +} + +int test_queue_multiple_queues() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int jj; + for (jj = 0; jj < agent_list.num_agents; ++jj) { + hsa_queue_type_t queue_type; + uint32_t queue_max; + uint32_t queue_max_size; + + // Get the supported queue type + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUE_TYPE, (void*) &queue_type); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the max number of queues for the agent + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUES_MAX, (void*) &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the maximum size of the queues + status = hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_QUEUE_MAX_SIZE, (void*) &queue_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max_size == 0) { + continue; + } + + hsa_queue_t* queues[queue_max]; + memset(queues, 0, sizeof(queues)); + + // Create queues + int ii; + for (ii = 0; ii < queue_max; ++ii) { + status = hsa_queue_create(agent_list.agents[jj], queue_max_size, queue_type, NULL, NULL, UINT32_MAX, UINT32_MAX, &queues[ii]); + + // Break if the API returns because of resource issues + if (HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } + + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Use the number of queues that were created + queue_max = ii; + + // Allocate an array to store ids of every queue + uint32_t ids[queue_max]; + memset(ids, 0, sizeof(ids)); + + for (ii = 0; ii < queue_max; ++ii) { + // Get the id of the queue and store into id array + ids[ii] = queues[ii]->id; + + // Check if read_pointer and write_pointer are initialized to zero + uint64_t read_pointer, write_pointer; + // read_pointer = hsa_queue_load_read_index_acquire(queues[ii]); + // ASSERT(read_pointer == 0); + + // write_pointer = hsa_queue_load_write_index_acquire(queues[ii]); + // ASSERT(write_pointer == 0); + } + + // Sort the queue ids + qsort(ids, queue_max, sizeof(uint32_t), compare_queue_ids); + + // Check that all the ids are unique + for (ii = 1; ii < queue_max; ++ii) { + ASSERT_MSG(ids[ii - 1] != ids[ii], "the ids of queues are not unique\n"); + } + + uint32_t features = 0; + hsa_agent_get_info(agent_list.agents[jj], HSA_AGENT_INFO_FEATURE, &features); + + // If the agent is capable if dispatch, attempt to dispatch a kernel + // to the set of queues + if ((features & HSA_AGENT_FEATURE_KERNEL_DISPATCH) != 0) { + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[jj], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[jj], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a single signal to indicate completion + hsa_signal_t signal; + hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // For each queue, dispatch the kernel and wait for it to complete. + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + dispatch_packet.completion_signal = signal; + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + for (ii = 0; ii < queue_max; ++ii) { + // Initialize the signal value to 1. + hsa_signal_store_relaxed(signal, 1); + + // Enqueue the packet + enqueue_dispatch_packet(queues[ii], &dispatch_packet); + + // Wait on the signal value to decrement + status = hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the signal. + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the queues + for (ii = 0; ii < queue_max; ++ii) { + status = hsa_queue_destroy(queues[ii]); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "hsa_queue_destory failed\n"); + } + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown the runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_size_create.c b/src/core/queue/test_queue_size_create.c new file mode 100644 index 0000000..f0ac0af --- /dev/null +++ b/src/core/queue/test_queue_size_create.c @@ -0,0 +1,230 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_size_create + * Scope: Conformance + + * Purpose: Verifies that all queue sizes that are a power of 2 between + * 2 and HSA_AGENT_INFO_QUEUE_MAX_SIZE can be used to create a valid + * queue on the agent. + * + * Test Description: + * 1) For each agent in the system query the HSA_AGENT_INFO_QUEUE_MAX_SIZE. + * 2) For the agent create queues using size values between 2 and + * HSA_AGENT_INFO_QUEUE_MAX_SIZE, performing the following for each: + * a) Check that the write and read indexes are initialized to 0. + * b) Check that every packet in the queue buffer has its type + * initialized to HSA_PACKET_TYPE_INVALID. + * c) Dispatch a number of no-op kernels to the queue equal to the + * size of the queue, using a signal for the last dispatch to determine + * when the dispatches have finished. This is required if the queue + * has the HSA_QUEUE_FEATURE_DISPATCH feature. + * d) Destroy each queue. + * 3) Repeat for each agent. + */ + +#include +#include +#include +#include +#include + +int test_queue_size_create() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("no_op.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get the maximum number of queues that is supported on this agent + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + if (queue_max < 1) { + continue; + } + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__no_op_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the queue's maximum size + uint32_t queue_max_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The queue_size must be power of 2 + uint32_t queue_size; + for (queue_size = 2; queue_size <= queue_max_size; queue_size *= 2) { + // Create a queue with specified "queue_size" + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify that the read_index and write_index are initialized to zero + uint64_t read_index, write_index; + read_index = hsa_queue_load_read_index_acquire(queue); + ASSERT((uint64_t)0 == read_index); + write_index = hsa_queue_load_write_index_acquire(queue); + ASSERT((uint64_t)0 == write_index); + + // Create the signal with initial value of 1. + // The last packet in the queue will set the signal + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Fill in info for the default dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.kernarg_address = 0; + + int jj; + for (jj = 0; jj < queue_size; ++jj) { + // Request a new packet ID + uint64_t write_index = hsa_queue_add_write_index_acquire(queue, 1); + + // Compute packet offset + hsa_kernel_dispatch_packet_t* queue_packet = (hsa_kernel_dispatch_packet_t*) queue->base_address + + write_index % queue->size; + + // Verify the packet header is initialized to INVALID + ASSERT((uint8_t) HSA_PACKET_TYPE_INVALID == *(uint8_t*)(&queue_packet->header)); + + if (jj == queue_size - 1) { + dispatch_packet.completion_signal = signal; + } + + enqueue_dispatch_packet_at(write_index, queue, &dispatch_packet); + } + + // Wait until all the kernels are complete + hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Destroy the signal + hsa_signal_destroy(signal); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_write_index_add_acq_rel_ordering.c b/src/core/queue/test_queue_write_index_add_acq_rel_ordering.c new file mode 100644 index 0000000..eb11730 --- /dev/null +++ b/src/core/queue/test_queue_write_index_add_acq_rel_ordering.c @@ -0,0 +1,252 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_add_write_index_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_queue_add_write_index_release and + * hsa_queue_add_write_index_acquire APIs enforce correct memory + * ordering. + * + * Test Description: + * 1) Query the platform for a list of agents that support the + * HSA_AGENT_FEATURE_KERNEL_DISPATCH feature. + * 2) For each agent, + * 3) Query the HSA_AGENT_INFO_QUEUES_MAX parameter. + * 4) Create HSA_AGENT_INFO_QUEUES_MAX queues. + * 5) Creates one thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_relaxed. + * b) Adds zero to the first queues value with hsa_queue_add_write_index_acq_rel. + * c) Reads the value of all of the queues but the first and last with + * hsa_queue_load_write_index_relaxed, verifies that they have the value of v, + * and then increments the value by one using hsa_queue_add_write_index_relaxed. + * d) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_relaxed. + * e) Adds zero to the last queues value with hsa_queue_add_write_index_acq_rel. + * f) Terminates when v = termination_value. + * 6) Create a second thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_relaxed. + * b) Adds zero to the first queues value with hsa_queue_add_write_index_acq_rel. + * c) Reads the value of all of the queues but the first and last with + * hsa_queue_load_write_index_relaxed, verifies that they have the value of v, + * and then increments the value by one using hsa_queue_add_write_index_relaxed. + * d) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_relaxed. + * e) Adds zero to the last queues value with hsa_queue_add_write_index_acq_rel. + * f) Terminates when v = termination_value. + * + * Expected Results: The value of v should monotonically increase, and + * the queues that are operated on using the relaxed versions of the + * API should all have the expected value. + */ + +#include +#include +#include +#include +#include + +typedef struct write_index_thread_data_s { + hsa_queue_t** queues; + int num_queues; + uint64_t termination_value; +} write_index_thread_data_t; + +void thread_proc_write_index_add_acq_rel_even(void* data) { + write_index_thread_data_t* thread_data = (write_index_thread_data_t*)data; + int ii; + for (ii = 2; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the first queue. + while (old_v != hsa_queue_cas_write_index_relaxed( + thread_data->queues[0], old_v, v)) {} + + // A barrier to prevent memory reordering during runtime, by adding 0 + // to the write_index of the last queue (no change to its value). + hsa_queue_add_write_index_acq_rel(thread_data->queues[thread_data->num_queues - 1], 0); + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // Also, increment the write_indices. + int jj; + for (jj = 1; jj < thread_data->num_queues - 1; ++jj) { + ASSERT(hsa_queue_load_write_index_relaxed(thread_data->queues[jj]) == old_v); + hsa_queue_add_write_index_relaxed(thread_data->queues[jj], 1); + } + + // A barrier to prevent memory reordering during runtime. + // Recover the write_index of the last queue. + hsa_queue_add_write_index_acq_rel(thread_data->queues[thread_data->num_queues - 1], 0); + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_relaxed(thread_data->queues[thread_data->num_queues - 1], old_v, v)); + } +} + +void thread_proc_write_index_add_acq_rel_odd(void* data) { + // Since the write_index of all queues are initialized to 0, this thread + // will get to run first + write_index_thread_data_t* thread_data = (write_index_thread_data_t*)data; + int ii; + for (ii = 1; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the last queue. + while (old_v != hsa_queue_cas_write_index_relaxed( + thread_data->queues[thread_data->num_queues - 1], old_v, v)) {} + + // A barrier to prevent memory reordering during runtime, by adding 0 + // to the write_index of the first queue (no change to its value). + hsa_queue_add_write_index_acq_rel(thread_data->queues[0], 0); + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // Also, increment the write_indices. + int jj; + for (jj = thread_data->num_queues - 2; jj >= 1; --jj) { + ASSERT(hsa_queue_load_write_index_relaxed(thread_data->queues[jj]) == old_v); + hsa_queue_add_write_index_relaxed(thread_data->queues[jj], 1); + } + + // a barrier to prevent memory reordering during runtime. + hsa_queue_add_write_index_acq_rel(thread_data->queues[0], 0); + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_relaxed(thread_data->queues[0], old_v, v)); + } +} + +int test_queue_write_index_add_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get max number of queues + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queue + continue; + } + + // Get max number of queues + uint32_t queue_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the queues + hsa_queue_t** queues = (hsa_queue_t**)malloc(queue_max * sizeof(hsa_queue_t*)); + + int num_queues = 0; + while (num_queues < queue_max) { + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, queues + num_queues); + if (HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } + ASSERT(HSA_STATUS_SUCCESS == status); + ++num_queues; + } + + // Thread data + write_index_thread_data_t thread_data; + thread_data.queues = queues; + thread_data.num_queues = num_queues; + // Choose a termination_value >= 2 + thread_data.termination_value = 256; + + // Create the test group + struct test_group* tg = test_group_create(2); + test_group_add(tg, &thread_proc_write_index_add_acq_rel_even, &thread_data, 1); + test_group_add(tg, &thread_proc_write_index_add_acq_rel_odd, &thread_data, 1); + test_group_thread_create(tg); + test_group_start(tg); + test_group_wait(tg); + test_group_exit(tg); + test_group_destroy(tg); + + // Destroy queues + int jj; + for (jj = 0; jj < num_queues; ++jj) { + status = hsa_queue_destroy(queues[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(queues); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_write_index_add_acquire_release_ordering.c b/src/core/queue/test_queue_write_index_add_acquire_release_ordering.c new file mode 100644 index 0000000..d5de817 --- /dev/null +++ b/src/core/queue/test_queue_write_index_add_acquire_release_ordering.c @@ -0,0 +1,251 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_add_write_index_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_queue_add_write_index_release and + * hsa_queue_add_write_index_acquire APIs enforce correct memory + * ordering. + * + * Test Description: + * 1) Query the platform for a list of agents that support the + * HSA_AGENT_FEATURE_KERNEL_DISPATCH feature. + * 2) For each agent, + * 3) Query the HSA_AGENT_INFO_QUEUES_MAX parameter. + * 4) Create HSA_AGENT_INFO_QUEUES_MAX queues. + * 5) Creates one thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_relaxed. + * b) Adds zero to the first queues value with hsa_queue_add_write_index_acquire. + * c) Reads the value of all of the queues but the first and last with + * hsa_queue_load_write_index_relaxed, verifies that they have the value of v, + * and then increments the value by one using hsa_queue_add_write_index_relaxed. + * d) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_relaxed. + * e) Adds zero to the last queues value with hsa_queue_add_write_index_release. + * f) Terminates when v = termination_value. + * 6) Create a second thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_relaxed. + * b) Adds zero to the first queues value with hsa_queue_add_write_index_acquire. + * c) Reads the value of all of the queues but the first and last with + * hsa_queue_load_write_index_relaxed, verifies that they have the value of v, + * and then increments the value by one using hsa_queue_add_write_index_relaxed. + * d) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_relaxed. + * e) Adds zero to the last queues value with hsa_queue_add_write_index_release. + * f) Terminates when v = termination_value. + * + * Expected Results: The value of v should monotonically increase, and + * the queues that are operated on using the relaxed versions of the + * API should all have the expected value. + */ + +#include +#include +#include +#include +#include + +typedef struct write_index_thread_data_s { + hsa_queue_t** queues; + int num_queues; + uint64_t termination_value; +} write_index_thread_data_t; + +void thread_proc_write_index_add_acquire_release_even(void* data) { + write_index_thread_data_t* thread_data = (write_index_thread_data_t*)data; + int ii; + for (ii = 2; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the first queue. + while (old_v != hsa_queue_cas_write_index_relaxed( + thread_data->queues[0], old_v, v)) {} + + // A barrier to prevent memory reordering during runtime, by adding 0 + // to the write_index of the last queue (no change to its value). + hsa_queue_add_write_index_acquire(thread_data->queues[thread_data->num_queues - 1], 0); + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // Also, increment the write_indices. + int jj; + for (jj = 1; jj < thread_data->num_queues - 1; ++jj) { + ASSERT(hsa_queue_load_write_index_relaxed(thread_data->queues[jj]) == old_v); + hsa_queue_add_write_index_relaxed(thread_data->queues[jj], 1); + } + + // A barrier to prevent memory reordering during runtime. + // Recover the write_index of the last queue. + hsa_queue_add_write_index_release(thread_data->queues[thread_data->num_queues - 1], 0); + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_relaxed(thread_data->queues[thread_data->num_queues - 1], old_v, v)); + } +} + +void thread_proc_write_index_add_acquire_release_odd(void* data) { + // Since the write_index of all queues are initialized to 0, this thread + // will get to run first + write_index_thread_data_t* thread_data = (write_index_thread_data_t*)data; + int ii; + for (ii = 1; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the last queue. + while (old_v != hsa_queue_cas_write_index_relaxed( + thread_data->queues[thread_data->num_queues - 1], old_v, v)) {} + + // A barrier to prevent memory reordering during runtime, by adding 0 + // to the write_index of the first queue (no change to its value). + hsa_queue_add_write_index_acquire(thread_data->queues[0], 0); + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // Also, increment the write_indices. + int jj; + for (jj = thread_data->num_queues - 2; jj >= 1; --jj) { + ASSERT(hsa_queue_load_write_index_relaxed(thread_data->queues[jj]) == old_v); + hsa_queue_add_write_index_relaxed(thread_data->queues[jj], 1); + } + + // A barrier to prevent memory reordering during runtime. + hsa_queue_add_write_index_release(thread_data->queues[0], 0); + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_relaxed(thread_data->queues[0], old_v, v)); + } +} + +int test_queue_write_index_add_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get max number of queues + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queue + continue; + } + + // Get the queue size + uint32_t queue_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the queues + hsa_queue_t** queues = (hsa_queue_t**)malloc(queue_max * sizeof(hsa_queue_t*)); + + int num_queues = 0; + while (num_queues < queue_max) { + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, queues + num_queues); + if (HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } + ASSERT(HSA_STATUS_SUCCESS == status); + ++num_queues; + } + + // Thread data + write_index_thread_data_t thread_data; + thread_data.queues = queues; + thread_data.num_queues = num_queues; + // Choose a termination_value >= 2 + thread_data.termination_value = 256; + + // Create the test group + struct test_group* tg = test_group_create(2); + test_group_add(tg, &thread_proc_write_index_add_acquire_release_even, &thread_data, 1); + test_group_add(tg, &thread_proc_write_index_add_acquire_release_odd, &thread_data, 1); + test_group_thread_create(tg); + test_group_start(tg); + test_group_wait(tg); + test_group_exit(tg); + test_group_destroy(tg); + + int jj; + for (jj = 0; jj < num_queues; ++jj) { + status = hsa_queue_destroy(queues[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(queues); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_write_index_add_atomic.c b/src/core/queue/test_queue_write_index_add_atomic.c new file mode 100644 index 0000000..d5131b8 --- /dev/null +++ b/src/core/queue/test_queue_write_index_add_atomic.c @@ -0,0 +1,186 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_write_index_add_atomic + * Scope: Conformance + * + * Purpose: Verifies that the hsa_queue_write_index_add operations is atomic, + * and 'torn' adds do not occur when this API is executed concurrently. + * + * Test Description: + * 1) Create a queue, with an initial write index value of 0. + * 2) Create 4 threads, that + * a) Call hsa_signal_add in a loop, incrementing the value of + * the signal by 1. + * b) Each performs millions of additions to the write index value. + * 3) After the threads have finished, check the final index value. + * 4) Repeat several times. + * 5) Repeat for all versions of the hsa_queue_write_index_add APIs, i.e. acquire, + * release, relaxed and acq_rel memory ordering versions. + * + * Expected Results: The final write index value should equal the sum of + * all of the additions from all threads. + */ + +#include +#include +#include +#include +#include + +#define QUEUE_WRITE_INDEX_NUM_OF_ADD_ATOMIC 1*1024*1024 + +typedef struct write_index_add_atomic_data_s { + hsa_queue_t* queue; + int memory_ordering_type; +} write_index_add_atomic_data_t; + +void thread_proc_write_index_add_atomic(void* data) { + write_index_add_atomic_data_t* thread_data = (write_index_add_atomic_data_t*)data; + int ii; + for (ii = 0; ii < QUEUE_WRITE_INDEX_NUM_OF_ADD_ATOMIC; ++ii) { + switch (thread_data->memory_ordering_type) { + case 0: + hsa_queue_add_write_index_acq_rel(thread_data->queue, 1); + break; + case 1: + hsa_queue_add_write_index_acquire(thread_data->queue, 1); + break; + case 2: + hsa_queue_add_write_index_relaxed(thread_data->queue, 1); + break; + case 3: + hsa_queue_add_write_index_release(thread_data->queue, 1); + break; + default: + break; + } + } +} + +int test_queue_write_index_add_atomic(void) { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get max number of queues + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queues + continue; + } + + // Get max number of queues + uint32_t queue_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + int jj; + const int repeat = 2; + for (jj = 0; jj < repeat; ++jj) { + int memory_ordering_type; + for (memory_ordering_type = 0; memory_ordering_type < 4; ++memory_ordering_type) { + // Thread data + write_index_add_atomic_data_t thread_data; + thread_data.queue = queue; + thread_data.memory_ordering_type = memory_ordering_type; + + // Create the test group + const int num_threads = 4; + struct test_group* tg = test_group_create(num_threads); + int kk; + for (kk = 0; kk < num_threads; ++kk) { + test_group_add(tg, &thread_proc_write_index_add_atomic, &thread_data, 1); + } + test_group_thread_create(tg); + test_group_start(tg); + test_group_wait(tg); + test_group_exit(tg); + test_group_destroy(tg); + + // Verify the write_index + uint64_t write_index = hsa_queue_load_write_index_relaxed(queue); + uint64_t expected = (uint64_t)(QUEUE_WRITE_INDEX_NUM_OF_ADD_ATOMIC * num_threads); + ASSERT(expected == write_index); + + // Restore the write_index of the queue + hsa_queue_store_write_index_release(queue, 0); + } + } + + // Destroy queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_write_index_cas_acq_rel_ordering.c b/src/core/queue/test_queue_write_index_cas_acq_rel_ordering.c new file mode 100644 index 0000000..479a374 --- /dev/null +++ b/src/core/queue/test_queue_write_index_cas_acq_rel_ordering.c @@ -0,0 +1,231 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_cas_write_index_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_queue_cas_write_index_acq_rel API + * enforces correct memory ordering. + * + * + * Test Description: + * 1) Query the platform for a list of agents that support the + * HSA_AGENT_FEATURE_KERNEL_DISPATCH feature. + * 2) For each agent, + * 3) Query the HSA_AGENT_INFO_QUEUES_MAX parameter. + * 4) Create HSA_AGENT_INFO_QUEUES_MAX queues. + * 5) Creates one thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_acq_rel. + * b) Attempts to set all the other queue's except the first and last, + * write index to v + 1 with hsa_queue_cas_write_index_relaxed, + * expecting the API to return v. + * c) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_acq_rel. + * d) Terminates when v = termination_value. + * 6) Create a second thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_acq_rel. + * b) Attempts to set all the other queue's except the first and last, + * write index to v + 1 with hsa_queue_cas_write_index_relaxed, + * expecting the API to return v. + * c) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_acq_rel. + * d) Terminates when v = termination_value+1. + * + * Expected Results: The value of v should monotonically increase, and + * the queues that are operated on using the relaxed versions of the + * API should all have the expected value. + */ + +#include +#include +#include +#include +#include + +typedef struct write_index_cas_thread_data_s { + hsa_queue_t** queues; + int num_queues; + uint64_t termination_value; +} write_index_cas_thread_data_t; + +void thread_proc_write_index_cas_acq_rel_even(void* data) { + write_index_cas_thread_data_t* thread_data = (write_index_cas_thread_data_t*)data; + int ii; + for (ii = 2; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the first queue. + while (old_v != hsa_queue_cas_write_index_acq_rel( + thread_data->queues[0], old_v, v)) {} + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // Also, increment the write_indices. + int jj; + for (jj = 1; jj < thread_data->num_queues - 1; ++jj) { + ASSERT(hsa_queue_cas_write_index_relaxed(thread_data->queues[jj], old_v, v) == old_v); + } + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_acq_rel(thread_data->queues[thread_data->num_queues - 1], old_v, v)); + } +} + +void thread_proc_write_index_cas_acq_rel_odd(void* data) { + // Since the write_index of all queues are initialized to 0, this thread + // will get to run first + write_index_cas_thread_data_t* thread_data = (write_index_cas_thread_data_t*)data; + int ii; + for (ii = 1; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the last queue. + while (old_v != hsa_queue_cas_write_index_acq_rel( + thread_data->queues[thread_data->num_queues - 1], old_v, v)) {} + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // Also, increment the write_indices. + int jj; + for (jj = thread_data->num_queues - 2; jj >= 1; --jj) { + ASSERT(hsa_queue_cas_write_index_relaxed(thread_data->queues[jj], old_v, v) == old_v); + } + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_acq_rel(thread_data->queues[0], old_v, v)); + } +} + +int test_queue_write_index_cas_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get max number of queues + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queue + continue; + } + + // Get the maximum queue size + uint32_t queue_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the queues + hsa_queue_t** queues = (hsa_queue_t**)malloc(queue_max * sizeof(hsa_queue_t*)); + + int num_queues = 0; + while (num_queues < queue_max) { + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, queues + num_queues); + if (HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } + ASSERT(HSA_STATUS_SUCCESS == status); + ++num_queues; + } + + // Thread data + write_index_cas_thread_data_t thread_data; + thread_data.queues = queues; + thread_data.num_queues = num_queues; + // Choose a termination_value >= 2 + thread_data.termination_value = 256; + + // Create the test group + struct test_group* tg = test_group_create(2); + test_group_add(tg, &thread_proc_write_index_cas_acq_rel_even, &thread_data, 1); + test_group_add(tg, &thread_proc_write_index_cas_acq_rel_odd, &thread_data, 1); + test_group_thread_create(tg); + test_group_start(tg); + test_group_wait(tg); + test_group_exit(tg); + test_group_destroy(tg); + + // Destroy the queues + int jj; + for (jj = 0; jj < num_queues; ++jj) { + status = hsa_queue_destroy(queues[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(queues); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_write_index_cas_acquire_release_ordering.c b/src/core/queue/test_queue_write_index_cas_acquire_release_ordering.c new file mode 100644 index 0000000..b1fb41b --- /dev/null +++ b/src/core/queue/test_queue_write_index_cas_acquire_release_ordering.c @@ -0,0 +1,229 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_cas_write_index_acquire_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_queue_cas_write_index_release and + * hsa_queue_cas_write_index_acquire APIs enforce correct memory + * ordering. + * + * Test Description: + * 1) Query the platform for a list of agents that support the + * HSA_AGENT_FEATURE_KERNEL_DISPATCH feature. + * 2) For each agent, + * 3) Query the HSA_AGENT_INFO_QUEUES_MAX parameter. + * 4) Create HSA_AGENT_INFO_QUEUES_MAX queues. + * 5) Creates one thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_acquire. + * b) Attempts to set all the other queue's except the first and last, + * write index to v + 1 with hsa_queue_cas_write_index_relaxed, + * expecting the API to return v. + * c) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 0 with hsa_queue_cas_write_index_release. + * d) Terminates when v = termination_value. + * 6) Create a second thread that: + * a) Loops on the first queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_acquire. + * b) Attempts to set all the other queue's except the first and last, + * write index to v + 1 with hsa_queue_cas_write_index_relaxed, + * expecting the API to return v. + * c) Loops on the last queue's write_index value, setting the value + * to v + 1 when v % 2 == 1 with hsa_queue_cas_write_index_release. + * d) Terminates when v = termination_value+1. + * + * Expected Results: The value of v should monotonically increase, and + * the queues that are operated on using the relaxed versions of the + * API should all have the expected value. + */ + +#include +#include +#include +#include +#include + +typedef struct write_index_cas_thread_data_s { + hsa_queue_t** queues; + int num_queues; + uint64_t termination_value; +} write_index_cas_thread_data_t; + +void thread_proc_write_index_cas_acquire_release_even(void* data) { + write_index_cas_thread_data_t* thread_data = (write_index_cas_thread_data_t*)data; + int ii; + for (ii = 2; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the first queue. + while (old_v != hsa_queue_cas_write_index_acquire( + thread_data->queues[0], old_v, v)) {} + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // Also, increment the write_indices. + int jj; + for (jj = 1; jj < thread_data->num_queues - 1; ++jj) { + ASSERT(hsa_queue_cas_write_index_relaxed(thread_data->queues[jj], old_v, v) == old_v); + } + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_release(thread_data->queues[thread_data->num_queues - 1], old_v, v)); + } +} + +void thread_proc_write_index_cas_acquire_release_odd(void* data) { + // Since the write_index of all queues are initialized to 0, this thread + // will get to run first + write_index_cas_thread_data_t* thread_data = (write_index_cas_thread_data_t*)data; + int ii; + for (ii = 1; ii < thread_data->termination_value; ii += 2) { + uint64_t v = (uint64_t)ii; + + // This is to verify the write_indices of all queues in the middle. + uint64_t old_v = v - 1; + + // Increment the write_index of the last queue. + while (old_v != hsa_queue_cas_write_index_acquire( + thread_data->queues[thread_data->num_queues - 1], old_v, v)) {} + + // Verify the write_indices of queues in the middle has been updated + // by the other thread. + // And increment the write_indices. + int jj; + for (jj = thread_data->num_queues - 2; jj >= 1; --jj) { + ASSERT(hsa_queue_cas_write_index_relaxed(thread_data->queues[jj], old_v, v) == old_v); + } + + // This doesn't have to be in a loop. + // Increment the write_index of the first queue. + ASSERT(old_v == hsa_queue_cas_write_index_release(thread_data->queues[0], old_v, v)); + } +} + +int test_queue_write_index_cas_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get max number of queues + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queue + continue; + } + + uint32_t queue_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the queues + hsa_queue_t** queues = (hsa_queue_t**)malloc(queue_max * sizeof(hsa_queue_t*)); + + int num_queues = 0; + while (num_queues < queue_max) { + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, queues + num_queues); + if (HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } + ASSERT(HSA_STATUS_SUCCESS == status); + ++num_queues; + } + + // Thread data + write_index_cas_thread_data_t thread_data; + thread_data.queues = queues; + thread_data.num_queues = num_queues; + // Choose a termination_value >= 2 + thread_data.termination_value = 256; + + // Create the test group + struct test_group* tg = test_group_create(2); + test_group_add(tg, &thread_proc_write_index_cas_acquire_release_even, &thread_data, 1); + test_group_add(tg, &thread_proc_write_index_cas_acquire_release_odd, &thread_data, 1); + test_group_thread_create(tg); + test_group_start(tg); + test_group_wait(tg); + test_group_exit(tg); + test_group_destroy(tg); + + int jj; + for (jj = 0; jj < num_queues; ++jj) { + status = hsa_queue_destroy(queues[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(queues); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_write_index_cas_atomic.c b/src/core/queue/test_queue_write_index_cas_atomic.c new file mode 100644 index 0000000..34b7e38 --- /dev/null +++ b/src/core/queue/test_queue_write_index_cas_atomic.c @@ -0,0 +1,196 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_cas_write_index_atomic + * Scope: Conformance + * + * Purpose: Verifies that the hsa_queue_cas_write_index operations is atomic, + * and 'torn' compare and swaps do not occur when this API is executed + * concurrently. + * + * Test Description: + * 1) Create a signal, assigning it an initial value of 0. + * 2) Create 4 threads, that + * a) Call hsa_queue_cas_write_index in a loop, comparing the value + * to an expected value and then advancing the value to the + * next value in the cycle. + * b) Thread 0 will exchange value to value + 1 when value%4=0 + * c) Thread 1 will exchange value to value + 1 when value%4=1 + * d) Thread 2 will exchange value to value + 1 when value%4=2 + * e) Thread 3 will exchange value to value + 1 when value%4=3 + * 4) Run the threads for millions of iterations of exchanges, with no + * explicit synchronization between the threads. + * 5) Repeat for all versions of the hsa_queue_case_write_index APIs, i.e. acquire, + * release, relaxed and acq_rel memory ordering versions. + * + * Expected Results: The value of the write index should increase monotonically, and + * advance through all expected values. + */ + +#include +#include +#include +#include +#include + +#define QUEUE_WRITE_INDEX_NUM_OF_CAS_ATOMIC 1*1024*1024 + +typedef struct write_index_cas_thread_data_s { + hsa_queue_t* queue; + int thread_index; + int num_threads; + uint64_t termination_value; + int memory_ordering_type; +} write_index_cas_thread_data_t; + +void thread_proc_write_index_cas_atomic(void* data) { + write_index_cas_thread_data_t* thread_data = (write_index_cas_thread_data_t*)data; + + int ii; + for (ii = thread_data->thread_index; ii < thread_data->termination_value; ii += thread_data->num_threads) { + switch (thread_data->memory_ordering_type) { + case 0: + while ((uint64_t)ii != hsa_queue_cas_write_index_acq_rel(thread_data->queue, ii, ii + 1)) + {} + break; + case 1: + while ((uint64_t)ii != hsa_queue_cas_write_index_acquire(thread_data->queue, ii, ii + 1)) + {} + break; + case 2: + while ((uint64_t)ii != hsa_queue_cas_write_index_relaxed(thread_data->queue, ii, ii + 1)) + {} + break; + case 3: + while ((uint64_t)ii != hsa_queue_cas_write_index_release(thread_data->queue, ii, ii + 1)) + {} + break; + } + } +} + +int test_queue_write_index_cas_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get max number of queues + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queues + continue; + } + + // Get the queue size + uint32_t queue_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Repeat for all four versions of hsa_queue_cas_write_index + int memory_ordering_type; + for (memory_ordering_type = 0; memory_ordering_type < 4; ++memory_ordering_type) { + // Thread data + const int num_threads = 4; + write_index_cas_thread_data_t thread_data[num_threads]; + + // Create the test group + struct test_group* tg = test_group_create(num_threads); + int jj; + for (jj = 0; jj < num_threads; ++jj) { + thread_data[jj].queue = queue; + thread_data[jj].thread_index = jj; + thread_data[jj].num_threads = num_threads; + thread_data[jj].memory_ordering_type = memory_ordering_type; + thread_data[jj].termination_value = QUEUE_WRITE_INDEX_NUM_OF_CAS_ATOMIC; + test_group_add(tg, &thread_proc_write_index_cas_atomic, thread_data + jj, 1); + } + test_group_thread_create(tg); + test_group_start(tg); + test_group_wait(tg); + test_group_exit(tg); + test_group_destroy(tg); + + // Verify the write_index + uint64_t write_index = hsa_queue_load_write_index_relaxed(queue); + uint64_t expected = (uint64_t)(QUEUE_WRITE_INDEX_NUM_OF_CAS_ATOMIC); + ASSERT(expected == write_index); + + // Restore the write_index of the queue + hsa_queue_store_write_index_release(queue, 0); + } + + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/queue/test_queue_write_index_load_store_atomic.c b/src/core/queue/test_queue_write_index_load_store_atomic.c new file mode 100644 index 0000000..418dbd8 --- /dev/null +++ b/src/core/queue/test_queue_write_index_load_store_atomic.c @@ -0,0 +1,218 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: queue_write_index_load_store_atomic + * Scope: Conformance + * + * Purpose: Verifies that the hsa_queue_write_index_load and store operations + * are atomic, and 'torn' loads or stores do not occur when these APIs are executed + * concurrently. + * + * Test Description: + * 1) Create a queue. + * 2) Create 2 threads, that + * a) Update the queue write index value, the first to 0 and the second + * to the HSA_AGENT_INFO_QUEUE_MAX_SIZE - 1, using a + * hsa_queue_write_index_store operation. + * b) Packets should not be initialized for dispatch. + * 3) Create 2 threads, that + * b) Read the signal value, and check if it is 0 or HSA_AGENT_INFO_QUEUE_MAX_SIZE - 1, + * using a hsa_write_index_load operation. + * 4) Run the threads for millions of iterations of loads and stores, with no + * explicit synchronization between the threads. + * 5) Repeat for all versions of the hsa_write_index load and store APIs, i.e. + * for stores us both hsa_store_write_index_acquire and hsa_store_write_index_relaxed + * and for loads use hsa_load_write_index_release and hsa_load_write_index_release. + * + * Expected Results: The reading threads should only see two possible signal values, + * 0 or INT64_MAX. + */ + +#include +#include +#include +#include +#include + +#define QUEUE_WRITE_INDEX_NUM_OF_LOAD_STORE_ATOMIC 1*128*1024 + +uint64_t STORE_VALUE; + +typedef struct write_index_load_atomic_thread_data_s { + hsa_queue_t* queue; + uint64_t num_iterations; + int memory_ordering_type; +} write_index_load_atomic_thread_data_t; + +typedef struct write_index_store_atomic_thread_data_s { + hsa_queue_t* queue; + uint64_t store_value; + uint64_t num_iterations; + int memory_ordering_type; +} write_index_store_atomic_thread_data_t; + +void thread_proc_write_index_load_atomic(void* data) { + write_index_load_atomic_thread_data_t* thread_data = + (write_index_load_atomic_thread_data_t*)data; + + int ii; + for (ii = 0; ii < thread_data->num_iterations; ++ii) { + uint64_t write_index; + if (0 == thread_data->memory_ordering_type) { + write_index = hsa_queue_load_write_index_acquire(thread_data->queue); + } else if (1 == thread_data->memory_ordering_type) { + write_index = hsa_queue_load_write_index_relaxed(thread_data->queue); + } else { + ASSERT(0); + } + // The only two possible values + ASSERT(0 == write_index || STORE_VALUE == write_index); + } +} + +void thread_proc_write_index_store_atomic(void* data) { + write_index_store_atomic_thread_data_t* thread_data = + (write_index_store_atomic_thread_data_t*)data; + + int ii; + for (ii = 0; ii < thread_data->num_iterations; ++ii) { + uint64_t write_index; + if (0 == thread_data->memory_ordering_type) { + hsa_queue_store_write_index_release(thread_data->queue, thread_data->store_value); + } else if (1 == thread_data->memory_ordering_type) { + hsa_queue_store_write_index_relaxed(thread_data->queue, thread_data->store_value); + } else { + ASSERT(0); + } + } +} + +int test_queue_write_index_load_store_atomic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get max number of queues + uint32_t queue_max; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUES_MAX, &queue_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (queue_max < 1) { + // This agent does not support any queue + continue; + } + + // Get queue size + uint32_t queue_size; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Use a 64-bit value to test the atomicity + STORE_VALUE = UINT64_MAX; + + // Repeat for all four versions of hsa_queue_cas_write_index + int memory_ordering_type; + for (memory_ordering_type = 0; memory_ordering_type < 2; ++memory_ordering_type) { + // Thread data + write_index_load_atomic_thread_data_t load_thread_data[2]; + write_index_store_atomic_thread_data_t store_thread_data[2]; + load_thread_data[0].queue = queue; + load_thread_data[0].num_iterations = QUEUE_WRITE_INDEX_NUM_OF_LOAD_STORE_ATOMIC; + load_thread_data[0].memory_ordering_type = memory_ordering_type; + load_thread_data[1].queue = queue; + load_thread_data[1].num_iterations = QUEUE_WRITE_INDEX_NUM_OF_LOAD_STORE_ATOMIC; + load_thread_data[1].memory_ordering_type = memory_ordering_type; + store_thread_data[0].queue = queue; + store_thread_data[0].store_value = 0; + store_thread_data[0].num_iterations = QUEUE_WRITE_INDEX_NUM_OF_LOAD_STORE_ATOMIC; + store_thread_data[0].memory_ordering_type = memory_ordering_type; + store_thread_data[1].queue = queue; + store_thread_data[1].store_value = STORE_VALUE; + store_thread_data[1].num_iterations = QUEUE_WRITE_INDEX_NUM_OF_LOAD_STORE_ATOMIC; + store_thread_data[1].memory_ordering_type = memory_ordering_type; + + // Create the test group + struct test_group* tg = test_group_create(4); + test_group_add(tg, &thread_proc_write_index_load_atomic, load_thread_data, 1); + test_group_add(tg, &thread_proc_write_index_load_atomic, load_thread_data + 1, 1); + test_group_add(tg, &thread_proc_write_index_store_atomic, store_thread_data, 1); + test_group_add(tg, &thread_proc_write_index_store_atomic, store_thread_data + 1, 1); + test_group_thread_create(tg); + test_group_start(tg); + test_group_wait(tg); + test_group_exit(tg); + test_group_destroy(tg); + } + + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/signals/config.h b/src/core/signals/config.h new file mode 100644 index 0000000..94d1c6d --- /dev/null +++ b/src/core/signals/config.h @@ -0,0 +1,67 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _CONFIG_H_ +#define _CONFIG_H_ + +#define GROUP_SIZE 4 +#define TEST_COUNT 10 +#define OP_COUNT 1024 + +#define NUM_ITERATION 128 +#define NUM_SIGNAL 128 // 256 +#define NUM_THREADS 32 +#define NUM_X 128 // 1024 +#define NUM_ITER_MEM_ORD 100 + +#ifdef HSA_LARGE_MODEL +#define FIRST_BIT 0x8000000000000000 +#else +#define FIRST_BIT 0x80000000 +#endif +#define LAST_BIT 0x1 +#define ALL_BIT -1 + +#endif // _CONFIG_H_ diff --git a/src/core/signals/hsa_signals.c b/src/core/signals/hsa_signals.c new file mode 100644 index 0000000..021ea01 --- /dev/null +++ b/src/core/signals/hsa_signals.c @@ -0,0 +1,231 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_signals.h" + +DEFINE_TEST(signal_create_concurrent); +DEFINE_TEST(signal_create_initial_value); +DEFINE_TEST(signal_create_max_consumers); +DEFINE_TEST(signal_create_one_consumers); +DEFINE_TEST(signal_create_zero_consumers); +DEFINE_TEST(signal_destroy_concurrent); +DEFINE_TEST(signal_kernel_multi_set); +DEFINE_TEST(signal_kernel_multi_wait); +DEFINE_TEST(signal_kernel_set); +DEFINE_TEST(signal_kernel_wait); +DEFINE_TEST(signal_wait_acquire_add); +DEFINE_TEST(signal_wait_relaxed_add); +DEFINE_TEST(signal_wait_acquire_and); +DEFINE_TEST(signal_wait_relaxed_and); +DEFINE_TEST(signal_wait_acquire_cas); +DEFINE_TEST(signal_wait_relaxed_cas); +DEFINE_TEST(signal_wait_conditions); +DEFINE_TEST(signal_wait_satisfied_conditions); +DEFINE_TEST(signal_wait_expectancy); +DEFINE_TEST(signal_wait_acquire_exchange); +DEFINE_TEST(signal_wait_relaxed_exchange); +DEFINE_TEST(signal_wait_acquire_or); +DEFINE_TEST(signal_wait_relaxed_or); +DEFINE_TEST(signal_wait_store_relaxed); +DEFINE_TEST(signal_wait_store_release); +DEFINE_TEST(signal_wait_acquire_subtract); +DEFINE_TEST(signal_wait_relaxed_subtract); +DEFINE_TEST(signal_wait_acquire_xor); +DEFINE_TEST(signal_wait_relaxed_xor); +DEFINE_TEST(signal_wait_acquire_timeout); +DEFINE_TEST(signal_wait_relaxed_timeout); +DEFINE_TEST(signal_store_release_load_acquire_ordering); +DEFINE_TEST(signal_store_release_load_acquire_ordering_transitive); +DEFINE_TEST(signal_load_store_atomic); +DEFINE_TEST(signal_add_acq_rel_ordering); +DEFINE_TEST(signal_add_acq_rel_ordering_transitive); +DEFINE_TEST(signal_add_acquire_release_ordering); +DEFINE_TEST(signal_add_acquire_release_ordering_transitive); +DEFINE_TEST(signal_add_atomic_acq_rel); +DEFINE_TEST(signal_add_atomic_acquire); +DEFINE_TEST(signal_add_atomic_release); +DEFINE_TEST(signal_add_atomic_relaxed); +DEFINE_TEST(signal_and_acq_rel_ordering); +DEFINE_TEST(signal_and_acq_rel_ordering_transitive); +DEFINE_TEST(signal_and_acquire_release_ordering); +DEFINE_TEST(signal_and_acquire_release_ordering_transitive); +DEFINE_TEST(signal_and_atomic_acq_rel); +DEFINE_TEST(signal_and_atomic_acquire); +DEFINE_TEST(signal_and_atomic_release); +DEFINE_TEST(signal_and_atomic_relaxed); +DEFINE_TEST(signal_cas_acq_rel_ordering); +DEFINE_TEST(signal_cas_acq_rel_ordering_transitive); +DEFINE_TEST(signal_cas_acquire_release_ordering); +DEFINE_TEST(signal_cas_acquire_release_ordering_transitive); +DEFINE_TEST(signal_cas_atomic_acq_rel); +DEFINE_TEST(signal_cas_atomic_acquire); +DEFINE_TEST(signal_cas_atomic_release); +DEFINE_TEST(signal_cas_atomic_relaxed); +DEFINE_TEST(signal_exchange_acq_rel_ordering); +DEFINE_TEST(signal_exchange_acq_rel_ordering_transitive); +DEFINE_TEST(signal_exchange_acquire_release_ordering); +DEFINE_TEST(signal_exchange_acquire_release_ordering_transitive); +DEFINE_TEST(signal_exchange_atomic_acq_rel); +DEFINE_TEST(signal_exchange_atomic_acquire); +DEFINE_TEST(signal_exchange_atomic_release); +DEFINE_TEST(signal_exchange_atomic_relaxed); +DEFINE_TEST(signal_or_acq_rel_ordering); +DEFINE_TEST(signal_or_acq_rel_ordering_transitive); +DEFINE_TEST(signal_or_acquire_release_ordering); +DEFINE_TEST(signal_or_acquire_release_ordering_transitive); +DEFINE_TEST(signal_or_atomic_acq_rel); +DEFINE_TEST(signal_or_atomic_acquire); +DEFINE_TEST(signal_or_atomic_release); +DEFINE_TEST(signal_or_atomic_relaxed); +DEFINE_TEST(signal_subtract_acq_rel_ordering); +DEFINE_TEST(signal_subtract_acq_rel_ordering_transitive); +DEFINE_TEST(signal_subtract_acquire_release_ordering_transitive); +DEFINE_TEST(signal_subtract_atomic_acq_rel); +DEFINE_TEST(signal_subtract_atomic_acquire); +DEFINE_TEST(signal_subtract_atomic_release); +DEFINE_TEST(signal_subtract_atomic_relaxed); +DEFINE_TEST(signal_xor_acq_rel_ordering); +DEFINE_TEST(signal_xor_acq_rel_ordering_transitive); +DEFINE_TEST(signal_xor_acquire_release_ordering); +DEFINE_TEST(signal_xor_acquire_release_ordering_transitive); +DEFINE_TEST(signal_xor_atomic_acq_rel); +DEFINE_TEST(signal_xor_atomic_acquire); +DEFINE_TEST(signal_xor_atomic_release); +DEFINE_TEST(signal_xor_atomic_relaxed); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(signal_create_concurrent); + ADD_TEST(signal_create_initial_value); + ADD_TEST(signal_create_max_consumers); + ADD_TEST(signal_create_one_consumers); + ADD_TEST(signal_create_zero_consumers); + ADD_TEST(signal_destroy_concurrent); + ADD_TEST(signal_kernel_multi_set); + ADD_TEST(signal_kernel_multi_wait); + ADD_TEST(signal_kernel_set); + ADD_TEST(signal_kernel_wait); + ADD_TEST(signal_wait_acquire_add); + ADD_TEST(signal_wait_relaxed_add); + ADD_TEST(signal_wait_acquire_and); + ADD_TEST(signal_wait_relaxed_and); + ADD_TEST(signal_wait_acquire_cas); + ADD_TEST(signal_wait_relaxed_cas); + ADD_TEST(signal_wait_conditions); + ADD_TEST(signal_wait_satisfied_conditions); + ADD_TEST(signal_wait_expectancy); + ADD_TEST(signal_wait_acquire_exchange); + ADD_TEST(signal_wait_relaxed_exchange); + ADD_TEST(signal_wait_acquire_or); + ADD_TEST(signal_wait_relaxed_or); + ADD_TEST(signal_wait_store_relaxed); + ADD_TEST(signal_wait_store_release); + ADD_TEST(signal_wait_acquire_subtract); + ADD_TEST(signal_wait_relaxed_subtract); + ADD_TEST(signal_wait_acquire_xor); + ADD_TEST(signal_wait_relaxed_xor); + ADD_TEST(signal_wait_acquire_timeout); + ADD_TEST(signal_wait_relaxed_timeout); + ADD_TEST(signal_store_release_load_acquire_ordering); + ADD_TEST(signal_store_release_load_acquire_ordering_transitive); + ADD_TEST(signal_load_store_atomic); + ADD_TEST(signal_add_acq_rel_ordering); + ADD_TEST(signal_add_acq_rel_ordering_transitive); + ADD_TEST(signal_add_acquire_release_ordering); + ADD_TEST(signal_add_acquire_release_ordering_transitive); + ADD_TEST(signal_add_atomic_acq_rel); + ADD_TEST(signal_add_atomic_acquire); + ADD_TEST(signal_add_atomic_release); + ADD_TEST(signal_add_atomic_relaxed); + ADD_TEST(signal_and_acq_rel_ordering); + ADD_TEST(signal_and_acq_rel_ordering_transitive); + ADD_TEST(signal_and_acquire_release_ordering); + ADD_TEST(signal_and_acquire_release_ordering_transitive); + ADD_TEST(signal_and_atomic_acq_rel); + ADD_TEST(signal_and_atomic_acquire); + ADD_TEST(signal_and_atomic_release); + ADD_TEST(signal_and_atomic_relaxed); + ADD_TEST(signal_cas_acq_rel_ordering); + ADD_TEST(signal_cas_acq_rel_ordering_transitive); + ADD_TEST(signal_cas_acquire_release_ordering); + ADD_TEST(signal_cas_acquire_release_ordering_transitive); + ADD_TEST(signal_cas_atomic_acq_rel); + ADD_TEST(signal_cas_atomic_acquire); + ADD_TEST(signal_cas_atomic_release); + ADD_TEST(signal_cas_atomic_relaxed); + ADD_TEST(signal_exchange_acq_rel_ordering); + ADD_TEST(signal_exchange_acq_rel_ordering_transitive); + ADD_TEST(signal_exchange_acquire_release_ordering); + ADD_TEST(signal_exchange_acquire_release_ordering_transitive); + ADD_TEST(signal_exchange_atomic_acq_rel); + ADD_TEST(signal_exchange_atomic_acquire); + ADD_TEST(signal_exchange_atomic_release); + ADD_TEST(signal_exchange_atomic_relaxed); + ADD_TEST(signal_or_acq_rel_ordering); + ADD_TEST(signal_or_acq_rel_ordering_transitive); + ADD_TEST(signal_or_acquire_release_ordering); + ADD_TEST(signal_or_acquire_release_ordering_transitive); + ADD_TEST(signal_or_atomic_acq_rel); + ADD_TEST(signal_or_atomic_acquire); + ADD_TEST(signal_or_atomic_release); + ADD_TEST(signal_or_atomic_relaxed); + ADD_TEST(signal_subtract_acq_rel_ordering); + ADD_TEST(signal_subtract_acq_rel_ordering_transitive); + ADD_TEST(signal_subtract_acquire_release_ordering_transitive); + ADD_TEST(signal_subtract_atomic_acq_rel); + ADD_TEST(signal_subtract_atomic_acquire); + ADD_TEST(signal_subtract_atomic_release); + ADD_TEST(signal_subtract_atomic_relaxed); + ADD_TEST(signal_xor_acq_rel_ordering); + ADD_TEST(signal_xor_acq_rel_ordering_transitive); + ADD_TEST(signal_xor_acquire_release_ordering); + ADD_TEST(signal_xor_acquire_release_ordering_transitive); + ADD_TEST(signal_xor_atomic_acq_rel); + ADD_TEST(signal_xor_atomic_acquire); + ADD_TEST(signal_xor_atomic_release); + ADD_TEST(signal_xor_atomic_relaxed); + RUN_TESTS(); +} diff --git a/src/core/signals/hsa_signals.h b/src/core/signals/hsa_signals.h new file mode 100644 index 0000000..fb28d48 --- /dev/null +++ b/src/core/signals/hsa_signals.h @@ -0,0 +1,137 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_SIGNALS_H_ +#define _HSA_SIGNALS_H_ +extern int test_signal_create_concurrent(); +extern int test_signal_create_initial_value(); +extern int test_signal_create_max_consumers(); +extern int test_signal_create_one_consumers(); +extern int test_signal_create_zero_consumers(); +extern int test_signal_destroy_concurrent(); +extern int test_signal_kernel_multi_set(); +extern int test_signal_kernel_multi_wait(); +extern int test_signal_kernel_set(); +extern int test_signal_kernel_wait(); +extern int test_signal_wait_acquire_add(); +extern int test_signal_wait_acquire_and(); +extern int test_signal_wait_acquire_cas(); +extern int test_signal_wait_acquire_exchange(); +extern int test_signal_wait_acquire_or(); +extern int test_signal_wait_acquire_subtract(); +extern int test_signal_wait_acquire_xor(); +extern int test_signal_wait_relaxed_add(); +extern int test_signal_wait_relaxed_and(); +extern int test_signal_wait_relaxed_cas(); +extern int test_signal_wait_relaxed_exchange(); +extern int test_signal_wait_relaxed_or(); +extern int test_signal_wait_relaxed_subtract(); +extern int test_signal_wait_relaxed_xor(); +extern int test_signal_wait_conditions(); +extern int test_signal_wait_expectancy(); +extern int test_signal_wait_satisfied_conditions(); +extern int test_signal_wait_store_release(); +extern int test_signal_wait_store_relaxed(); +extern int test_signal_wait_acquire_timeout(); +extern int test_signal_wait_relaxed_timeout(); +extern int test_signal_store_release_load_acquire_ordering(); +extern int test_signal_store_release_load_acquire_ordering_transitive(); +extern int test_signal_load_store_atomic(); +extern int test_signal_add_acq_rel_ordering(); +extern int test_signal_add_acq_rel_ordering_transitive(); +extern int test_signal_add_acquire_release_ordering(); +extern int test_signal_add_acquire_release_ordering_transitive(); +extern int test_signal_add_atomic_acq_rel(); +extern int test_signal_add_atomic_acquire(); +extern int test_signal_add_atomic_release(); +extern int test_signal_add_atomic_relaxed(); +extern int test_signal_and_acq_rel_ordering(); +extern int test_signal_and_acq_rel_ordering_transitive(); +extern int test_signal_and_acquire_release_ordering(); +extern int test_signal_and_acquire_release_ordering_transitive(); +extern int test_signal_and_atomic_acq_rel(); +extern int test_signal_and_atomic_acquire(); +extern int test_signal_and_atomic_release(); +extern int test_signal_and_atomic_relaxed(); +extern int test_signal_cas_acq_rel_ordering(); +extern int test_signal_cas_acq_rel_ordering_transitive(); +extern int test_signal_cas_acquire_release_ordering(); +extern int test_signal_cas_acquire_release_ordering_transitive(); +extern int test_signal_cas_atomic_acq_rel(); +extern int test_signal_cas_atomic_acquire(); +extern int test_signal_cas_atomic_release(); +extern int test_signal_cas_atomic_relaxed(); +extern int test_signal_exchange_acq_rel_ordering(); +extern int test_signal_exchange_acq_rel_ordering_transitive(); +extern int test_signal_exchange_acquire_release_ordering(); +extern int test_signal_exchange_acquire_release_ordering_transitive(); +extern int test_signal_exchange_atomic_acq_rel(); +extern int test_signal_exchange_atomic_acquire(); +extern int test_signal_exchange_atomic_release(); +extern int test_signal_exchange_atomic_relaxed(); +extern int test_signal_or_acq_rel_ordering(); +extern int test_signal_or_acq_rel_ordering_transitive(); +extern int test_signal_or_acquire_release_ordering(); +extern int test_signal_or_acquire_release_ordering_transitive(); +extern int test_signal_or_atomic_acq_rel(); +extern int test_signal_or_atomic_acquire(); +extern int test_signal_or_atomic_release(); +extern int test_signal_or_atomic_relaxed(); +extern int test_signal_subtract_acq_rel_ordering(); +extern int test_signal_subtract_acq_rel_ordering_transitive(); +extern int test_signal_subtract_acquire_release_ordering_transitive(); +extern int test_signal_subtract_atomic_acq_rel(); +extern int test_signal_subtract_atomic_acquire(); +extern int test_signal_subtract_atomic_release(); +extern int test_signal_subtract_atomic_relaxed(); +extern int test_signal_xor_acq_rel_ordering(); +extern int test_signal_xor_acq_rel_ordering_transitive(); +extern int test_signal_xor_acquire_release_ordering(); +extern int test_signal_xor_acquire_release_ordering_transitive(); +extern int test_signal_xor_atomic_acq_rel(); +extern int test_signal_xor_atomic_acquire(); +extern int test_signal_xor_atomic_release(); +extern int test_signal_xor_atomic_relaxed(); +#endif // _HSA_SIGNALS_H_ diff --git a/src/core/signals/test_signal_add_acq_rel_ordering.c b/src/core/signals/test_signal_add_acq_rel_ordering.c new file mode 100644 index 0000000..bc3ccd3 --- /dev/null +++ b/src/core/signals/test_signal_add_acq_rel_ordering.c @@ -0,0 +1,186 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_add_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_add_acq_rel API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to 2. + * 2) Create a control signal, denoted by y, also initialized to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acq_rel using + * 0 as the exchange value. + * b) When the value of y is 1 it sets the value, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2, and replacing it with a value of 1 by + * calling signal_add_relaxed to add -1 to the value. + * e) Sets the value of y to 2, using the signal_add_acq_rel API + * to add value of 2. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2, 2 in place of 1 and 1 in place of -1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if y equal to 1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to 2 +void test_signal_add_acq_rel_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_acq_rel(y, 1, 0) != 1); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 2 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_add_relaxed(x[ii], -1); + } + + // change y to 2 + hsa_signal_add_acq_rel(y, 2); + } + return; +} + + +// test func two: +// check y, if y equal to 2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to 1 +void test_signal_add_acq_rel_t2(void *data) { + int ii, jj; + + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 2 + while (hsa_signal_cas_acq_rel(y, 2, 0) != 2); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // change x value to 2 + hsa_signal_add_relaxed(x[ii], 1); + } + + // change y to 1 + hsa_signal_add_acq_rel(y, 1); + } + return; +} + +int test_signal_add_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_add_acq_rel_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_add_acq_rel_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_add_acq_rel_ordering_transitive.c b/src/core/signals/test_signal_add_acq_rel_ordering_transitive.c new file mode 100644 index 0000000..5da3608 --- /dev/null +++ b/src/core/signals/test_signal_add_acq_rel_ordering_transitive.c @@ -0,0 +1,237 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_add_acq_rel_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_add_acq_rel API + * enforces transitive memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value is 1. + * b) Uses signal_add_acq_rel to decrement the value of y to 0 by adding + * -1. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the signal_add_relaxed + * API by adding -1 to the value. + * e) Sets the value of y to 2, using the signal_add_acq_rel API to add + * 2 to the signal value. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until y is 2 using signal_wait_acquire. + * c) Sets the value of z to 2 using signal_cas_release with + * 1 as the condition. + * e) Waits until z is 1 using signal_wait_acquire. + * f) Set the value of y to 1 using signal_cas_release with + * 2 as the condition. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_add_acq_rel_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until y = 1 or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != 1) + if (y_val == -1) return; + + hsa_signal_add_acq_rel(y, -1); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_add_relaxed(x[ii], -1); + } + + // set y to 2 + hsa_signal_add_acq_rel(y, 2); + } + return; +} + + +void test_signal_add_acq_rel_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until z = 2 or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != 2) + if (z_val == -1) return; + + hsa_signal_add_acq_rel(z, -2); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // change x value to 1 + hsa_signal_add_relaxed(x[ii], 1); + } + + // set z to 1 + hsa_signal_add_acq_rel(z, 1); + } + return; +} + + +void test_signal_add_acq_rel_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set z to 0 + hsa_signal_cas_release(z, 1, 2); + + // loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set y to 0 + hsa_signal_cas_release(y, 2, 1); + } + + // set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + + return; +} + +int test_signal_add_acq_rel_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(1, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_add_acq_rel_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_add_acq_rel_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_add_acq_rel_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_add_acquire_release_ordering.c b/src/core/signals/test_signal_add_acquire_release_ordering.c new file mode 100644 index 0000000..3bca4eb --- /dev/null +++ b/src/core/signals/test_signal_add_acquire_release_ordering.c @@ -0,0 +1,178 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_add_acquire_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_add_release and the + * hsa_signal_add_acquire APIs enforce correct memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to 1. + * 2) Create a control signal, denoted by y, also initialized to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_relaxed using + * -1 as the exchange value. + * b) When the value of y is 1 it sets the value, the thread stops looping, and + * d) The value of y is incremented to 0 using hsa_signal_add_acquire. + * e) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2, and replacing it with a value of 1 by + * calling signal_add_relaxed to add -1 to the value. + * f) Sets the value of y to 2, using the signal_add_release API + * to add value of 2. + * g) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2, 2 in place of 1 and 1 in place of -1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// Check y, if y equal to 1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to 2 +void test_signal_add_acquire_release_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + while (hsa_signal_cas_acq_rel(y, 1, -1) != 1); + hsa_signal_add_acquire(y, 1); + + for (ii = 0; ii < NUM_X; ++ii) { + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t1 == 2); + + hsa_signal_add_relaxed(x[ii], -1); + } + + hsa_signal_add_release(y, 2); + } + return; +} + + +// Check y, if y equal to 2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to 1 +void test_signal_add_acquire_release_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + while (hsa_signal_cas_acq_rel(y, 2, -1) != 2); + hsa_signal_add_acquire(y, 1); + + for (ii = 0; ii < NUM_X; ++ii) { + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t2 == 1); + + hsa_signal_add_relaxed(x[ii], 1); + } + + hsa_signal_add_release(y, 1); + } + return; +} + +int test_signal_add_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_add_acquire_release_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_add_acquire_release_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_add_acquire_release_ordering_transitive.c b/src/core/signals/test_signal_add_acquire_release_ordering_transitive.c new file mode 100644 index 0000000..c2b7cd8 --- /dev/null +++ b/src/core/signals/test_signal_add_acquire_release_ordering_transitive.c @@ -0,0 +1,237 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_add_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_add_acquire and + * hsa_signal_add_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value is 1. + * b) Uses signal_add_acquire to decrement the value of y to 0 by adding + * -1. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the signal_add_relaxed + * API by adding -1 to the value. + * e) Sets the value of y to 2, using the signal_add_release API to add + * 2 to the signal value. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until y is 2 using signal_wait_acquire. + * c) Sets the value of z to 2 using signal_cas_release with + * 1 as the condition. + * e) Waits until z is 1 using signal_wait_acquire. + * f) Set the value of y to 1 using signal_cas_release with + * 2 as the condition. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_add_acquire_release_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until y = 1 or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != 1) + if (y_val == -1) return; + + hsa_signal_add_acquire(y, -1); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_add_relaxed(x[ii], -1); + } + + // set y to 2 + hsa_signal_add_release(y, 2); + } + return; +} + +void test_signal_add_acquire_release_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until z = 2 or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != 2) + if (z_val == -1) return; + + hsa_signal_add_acquire(z, -2); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // change x value to 1 + hsa_signal_add_relaxed(x[ii], 1); + } + + // set z to 1 + hsa_signal_add_release(z, 1); + } + return; +} + + +void test_signal_add_acquire_release_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set z to 0 + hsa_signal_cas_release(z, 1, 2); + + // loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set y to 0 + hsa_signal_cas_release(y, 2, 1); + } + + // set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + + return; +} + +int test_signal_add_acquire_release_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(1, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_add_acquire_release_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_add_acquire_release_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_add_acquire_release_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_add_atomic.c b/src/core/signals/test_signal_add_atomic.c new file mode 100644 index 0000000..79bac7a --- /dev/null +++ b/src/core/signals/test_signal_add_atomic.c @@ -0,0 +1,360 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_add_atomic + * + * Purpose: + * Verify atomicity of the add signal operation + * + * Description: + * + * 1) Create a signal, assigning it an initial value of 0. + * Create several threads, which call hsa_signal_add_acquire + * in a loop to add 1 to signal repeatedly. + * After threads finish, check if the value is correct, and + * repeat this process several times. + * + * 2) Create a signal, assigning it an initial value of 0. + * Create several threads, which call hsa_signal_add_release + * in a loop to add 1 to signal repeatedly. + * After threads finish, check if the value is correct, and + * repeat this process several times. + * + * 3) Create a signal, assigning it an initial value of 0. + * Create several threads, which call hsa_signal_add_relaxed + * in a loop to add 1 to signal repeatedly. + * After threads finish, check if the value is correct, and + * repeat this process several times. + * + * 4) Create a signal, assigning it an initial value of 0. + * Create several threads, which call hsa_signal_add_acq_rel + * in a loop to add 1 to signal repeatedly. + * After threads finish, check if the value is correct, and + * repeat this process several times. + * + */ + +#include +#include +#include +#include "config.h" + +static void child_func_acquire(void* data) { +// Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_add_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_add_acquire(*signal_handle, 1); + } + + return; +} + +static void child_func_release(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_add_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_add_release(*signal_handle, 1); + } + + return; +} + +static void child_func_relaxed(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_add_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_add_relaxed(*signal_handle, 1); + } + + return; +} + +static void child_func_acq_rel(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_add_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_add_acq_rel(*signal_handle, 1); + } + + return; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_signal_add_atomic_acquire() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + struct test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_acquire, &signal_handle, GROUP_SIZE); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not the expected value.\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_signal_add_atomic_release() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + struct test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_release, &signal_handle, GROUP_SIZE); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not the expected value.\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_signal_add_atomic_relaxed() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + struct test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_relaxed, &signal_handle, GROUP_SIZE); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not the expected value.\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_signal_add_atomic_acq_rel() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + struct test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_acq_rel, &signal_handle, GROUP_SIZE); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not the expected value.\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_add_release_ordering.c b/src/core/signals/test_signal_add_release_ordering.c new file mode 100644 index 0000000..7f7b77f --- /dev/null +++ b/src/core/signals/test_signal_add_release_ordering.c @@ -0,0 +1,182 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_add_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_add_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized to 1. + * 2) Create a control signal, denoted by y, also initialized to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acquire using + * 0 as the exchange value. + * b) When the value of y is 1 it sets the value, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2, and replacing it with a value of 1 by + * calling signal_add_relaxed to add -1 to the value. + * e) Sets the value of y to 2, using the signal_add_release API + * and a add value of 2. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2, 2 in place of 1 and 1 in place of -1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[1024]; + +// test func one: +// check y, if y equal to 1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to 2 +void test_signal_add_release_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + while (hsa_signal_cas_acquire(y, 1, 0) != 1); + + + for (ii = 0; ii < 1024; ++ii) { + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t1 == 2); + + hsa_signal_add_relaxed(x[ii], -1); + } + + hsa_signal_add_release(y, 2); + } + return; +} + + +// test func two: +// check y, if y equal to 2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to 1 +void test_signal_add_release_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + while (hsa_signal_cas_acquire(y, 2, 0) != 2); + + + for (ii = 0; ii < 1024; ++ii) { + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t2 == 1); + + hsa_signal_add_relaxed(x[ii], 1); + } + + hsa_signal_add_release(y, 1); + } + return; +} + + + +int test_signal_add_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(1, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_add_release_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_add_release_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_and_acq_rel_ordering.c b/src/core/signals/test_signal_and_acq_rel_ordering.c new file mode 100644 index 0000000..79ac02f --- /dev/null +++ b/src/core/signals/test_signal_and_acq_rel_ordering.c @@ -0,0 +1,178 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_and_acq_rel_ordering +* +* Purpose: +* Verify that sequential memory ordering of the hsa_signal_and_acq_rel +* API is correct. +* +* Description: +* +* 1) Create several signals and store the handles in an array, +* denoted by x[]. All the signal values should be initialized to have +* the last bit set. +* Create a control signal, denoted by y, also initialized to have only +* the last bit set. +* Start one thread that +* a) Checks the value of y in a loop using hsa_cas_acq_rel using +* 0 as the exchange value. +* b) When the value of y has only the last bit set, the thread stops looping, and +* c) Checks all of the x signal values with the signal_load_relaxed +* API, expecting all x values to have their last bit set . +* d) Replaces the x values by using signal_load_relaxed to set all bits, +* and then uses signal_and_relaxed to mask out all bits but the first. +* e) Replaces the value of y by using signal_load_relaxed to set all bits, +* and then uses signal_and_acq_rel to mask out all bits but the first. +* f) Starts over. +* Start a second thread that does exactly the same set of operations, +* but sets the last bit for all values, not the first, and triggers when +* the first bit of y is set, not the last. +* Let both threads run for thousands iterations. +* +*/ + +#include +#include +#include +#include "config.h" + +typedef struct { + hsa_signal_t* signal_x; + hsa_signal_t signal_y; +} param; + +static void* T1(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int j; + for (j = 0; j < NUM_ITERATION; ++j) { + while (1 != hsa_signal_cas_acq_rel(signal_y, 1, 0)); + int i; + for (i = 0; i < NUM_SIGNAL; ++i) { + value = hsa_signal_load_relaxed(signal_x[i]); + ASSERT_MSG(1 == value, "The value of signal_x[%d] is not equal to 1!\n", i); + // Set all bits of each x and mask out all bits but first + hsa_signal_store_relaxed(signal_x[i], -1); + hsa_signal_and_relaxed(signal_x[i], FIRST_BIT); + } + hsa_signal_store_relaxed(signal_y, -1); + hsa_signal_and_acquire(signal_y, FIRST_BIT); + } + return NULL; +} + +static void* T2(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int j; + for (j = 0; j < NUM_ITERATION; ++j) { + while (FIRST_BIT != hsa_signal_cas_acq_rel(signal_y, FIRST_BIT, 0)); + + int i; + for (i = 0; i < NUM_SIGNAL; ++i) { + value = hsa_signal_load_relaxed(signal_x[i]); + ASSERT_MSG(FIRST_BIT == value, "The value of x[%d] is not equal to the min negative value!\n", i); + // Set all bits of each x and mask out all bits but last + hsa_signal_store_relaxed(signal_x[i], -1); + hsa_signal_and_relaxed(signal_x[i], 1); + } + hsa_signal_store_relaxed(signal_y, -1); + hsa_signal_and_acq_rel(signal_y, 1); + } + return NULL; +} + +int test_signal_and_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_x[NUM_SIGNAL], signal_y; + // Set value of signal_x and signal_y to 1 + hsa_signal_value_t initial_value = 1; + int i; + for (i = 0; i < NUM_SIGNAL; ++i) { + status = hsa_signal_create(initial_value, 0, NULL, &signal_x[i]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_create(initial_value, 0, NULL, &signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + param arg[2]; + arg[0].signal_x = signal_x; + arg[0].signal_y = signal_y; + arg[1].signal_x = signal_x; + arg[1].signal_y = signal_y; + + pthread_t id[2]; + pthread_create(&id[0], NULL, T1, &arg[0]); + pthread_create(&id[1], NULL, T2, &arg[1]); + + pthread_join(id[0], NULL); + pthread_join(id[1], NULL); + + // Destroy signal + for (i = 0; i < NUM_SIGNAL; ++i) { + status = hsa_signal_destroy(signal_x[i]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_destroy(signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_and_acq_rel_ordering_transitive.c b/src/core/signals/test_signal_and_acq_rel_ordering_transitive.c new file mode 100644 index 0000000..ef5d02e --- /dev/null +++ b/src/core/signals/test_signal_and_acq_rel_ordering_transitive.c @@ -0,0 +1,243 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_and_acq_rel_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_and_acq_rel API + * enforces transitive memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to have their first bit set. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should have its last bit set and the z signal + * value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value has the last bit set. + * b) Uses signal_and_acq_rel to set the value of y to 0. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting each value to have the last bit set. + * d) Replaces the x values by using signal_load_relaxed to set all bits, + * and then uses signal_and_relaxed to mask out all bits but the first. + * e) Replaces the value of y by using signal_store_relaxed to set all bits, + * and then uses signal_and_acq_rel to mask out all bits but the first. + * f) Starts over. + * g) If it detects that the value of y is -2, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of z is set, not the last. It also operates on signal z, + * not y. + * 5) Start a third thread that + * a) Waits until y has its first bit set using signal_wait_acquire. + * c) Changes the value of z, using signal_store_relaxed to set all + * bits and then signal_and_release to mask out all but the first. + * e) Waits until z has only its last bit set with signal_wait_acquire. + * f) Changes the value of y, using signal_store_relaxed to set all bits + * and then signal_and_release to mask out all but the last. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -2 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_and_acq_rel_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until last bit of y has been set or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != LAST_BIT) + if (y_val == -2) return; + + hsa_signal_and_acq_rel(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // only last bit of every x should be set + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "only last bit of x value should be set\n"); + + // change first bit of x to 1 + hsa_signal_store_relaxed(x[ii], ALL_BIT); + hsa_signal_and_relaxed(x[ii], FIRST_BIT); + } + + // set first bit of y + hsa_signal_store_relaxed(y, ALL_BIT); + hsa_signal_and_acq_rel(y, FIRST_BIT); + } + return; +} + + +void test_signal_and_acq_rel_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until first bit of z has been set or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != FIRST_BIT) + if (z_val == -2) return; + + hsa_signal_and_acq_rel(z, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // only first bit of every x should be set + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "only first bit of x value should be set\n"); + + // change last bit of x + hsa_signal_store_relaxed(x[ii], ALL_BIT); + hsa_signal_and_relaxed(x[ii], LAST_BIT); + } + + // set last bit of z + hsa_signal_store_relaxed(z, ALL_BIT); + hsa_signal_and_acq_rel(z, LAST_BIT); + } + return; +} + + +void test_signal_and_acq_rel_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until first bit of y has been set up + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, FIRST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set first bit of z + hsa_signal_store_relaxed(z, ALL_BIT); + hsa_signal_and_release(z, FIRST_BIT); + + // wait until last bit of z has been set up + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, LAST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set last bit of y to 1 + hsa_signal_store_relaxed(y, ALL_BIT); + hsa_signal_and_release(y, LAST_BIT); + } + + // set y to -1 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != FIRST_BIT); + hsa_signal_store_release(y, -2); + hsa_signal_store_release(z, -2); + return; +} + +int test_signal_and_acq_rel_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values with setting last bit + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(0, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_and_acq_rel_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_and_acq_rel_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_and_acq_rel_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_and_acquire_release_ordering.c b/src/core/signals/test_signal_and_acquire_release_ordering.c new file mode 100644 index 0000000..2edac07 --- /dev/null +++ b/src/core/signals/test_signal_and_acquire_release_ordering.c @@ -0,0 +1,182 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_and_acquire_release_ordering + * + * Purpose: + * Verify that the hsa_signal_and_acquire and hsa_signal_and_release + * APIs enforce correct memory ordering. + * + * Description: + * + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to have + * the last bit set. + * Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * Start one thread that + * a) Checks the value of y in a loop using hsa_cas_relaxed using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Sets the value back to 0 using hsa_signal_and_acquire. + * d) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set . + * e) Replaces the x values by using signal_load_relaxed to set all bits, + * and then uses signal_and_relaxed to mask out all bits but the first. + * f) Replaces the value of y by using signal_load_relaxed to set all bits, + * and then uses signal_and_release to mask out all bits but the first. + * g) Starts over. + * Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * Let both threads run for 32K iterations. + * + */ + +#include +#include +#include +#include "config.h" + +typedef struct { + volatile hsa_signal_t* signal_x; + volatile hsa_signal_t signal_y; +} param; + +static void* test_signal_and_acquire_release_t1(void* arg) { + param* param_ptr = (param*)arg; + volatile hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (1 != hsa_signal_cas_relaxed(signal_y, 1, 0)); + hsa_signal_and_acquire(signal_y, 0); + + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(1 == value, "The value of signal_x[%d] is not equal to 1!\n", ii); + // Set all bits of each x and mask out all bits but first + hsa_signal_store_relaxed(signal_x[ii], -1); + hsa_signal_and_relaxed(signal_x[ii], FIRST_BIT); + } + hsa_signal_store_relaxed(signal_y, -1); + hsa_signal_and_release(signal_y, FIRST_BIT); + } + return arg; +} + +static void* test_signal_and_acquire_release_t2(void* arg) { + param* param_ptr = (param*)arg; + volatile hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (FIRST_BIT != hsa_signal_cas_relaxed(signal_y, FIRST_BIT, 0)); + hsa_signal_and_acquire(signal_y, 0); + + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(FIRST_BIT == value, "The value of x[%d] is not equal to the min negative value!\n", ii); + // Set all bits of each x and mask out all bits but last + hsa_signal_store_relaxed(signal_x[ii], -1); + hsa_signal_and_relaxed(signal_x[ii], 1); + } + hsa_signal_store_relaxed(signal_y, -1); + hsa_signal_and_release(signal_y, 1); + } + return arg; +} + +int test_signal_and_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_x[NUM_SIGNAL], signal_y; + // Set value of signal_x and signal_y to 1 + hsa_signal_value_t initial_value = 1; + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + status = hsa_signal_create(initial_value, 0, NULL, &signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_create(initial_value, 0, NULL, &signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + param arg[2]; + arg[0].signal_x = signal_x; + arg[0].signal_y = signal_y; + arg[1].signal_x = signal_x; + arg[1].signal_y = signal_y; + + pthread_t id[2]; + pthread_create(&id[0], NULL, test_signal_and_acquire_release_t1, &arg[0]); + pthread_create(&id[1], NULL, test_signal_and_acquire_release_t2, &arg[1]); + + pthread_join(id[0], NULL); + pthread_join(id[1], NULL); + + // Destroy signal + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + status = hsa_signal_destroy(signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_destroy(signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_and_acquire_release_ordering_transitive.c b/src/core/signals/test_signal_and_acquire_release_ordering_transitive.c new file mode 100644 index 0000000..59aa9cf --- /dev/null +++ b/src/core/signals/test_signal_and_acquire_release_ordering_transitive.c @@ -0,0 +1,242 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_and_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_and_acquire and + * hsa_signal_and_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to have their first bit set. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should have its last bit set and the z signal + * value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value has the last bit set. + * b) Uses signal_and_acquire to set the value of y to 0. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting each value to have the last bit set. + * d) Replaces the x values by using signal_load_relaxed to set all bits, + * and then uses signal_and_relaxed to mask out all bits but the first. + * e) Replaces the value of y by using signal_store_relaxed to set all bits, + * and then uses signal_and_release to mask out all bits but the first. + * f) Starts over. + * g) If it detects that the value of y is -2, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of z is set, not the last. It also operates on signal z, + * not y. + * 5) Start a third thread that + * a) Waits until y has its first bit set using signal_wait_acquire. + * c) Changes the value of z, using signal_store_relaxed to set all + * bits and then signal_and_release to mask out all but the first. + * e) Waits until z has only its last bit set with signal_wait_acquire. + * f) Changes the value of y, using signal_store_relaxed to set all bits + * and then signal_and_release to mask out all but the last. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -2 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_and_acquire_release_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until last bit of y has been set or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != LAST_BIT) + if (y_val == -2) return; + + hsa_signal_and_acquire(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // only last bit of every x should be set + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "only last bit of x value should be set\n"); + + // change first bit of x to 1 + hsa_signal_store_relaxed(x[ii], ALL_BIT); + hsa_signal_and_relaxed(x[ii], FIRST_BIT); + } + + // set first bit of y + hsa_signal_store_relaxed(y, ALL_BIT); + hsa_signal_and_release(y, FIRST_BIT); + } + return; +} + +void test_signal_and_acquire_release_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until first bit of z has been set or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != FIRST_BIT) + if (z_val == -2) return; + + hsa_signal_and_acquire(z, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // only first bit of every x should be set + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "only first bit of x value should be set\n"); + + // change last bit of x + hsa_signal_store_relaxed(x[ii], ALL_BIT); + hsa_signal_and_relaxed(x[ii], LAST_BIT); + } + + // set last bit of z + hsa_signal_store_relaxed(z, ALL_BIT); + hsa_signal_and_release(z, LAST_BIT); + } + return; +} + +void test_signal_and_acquire_release_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until first bit of y has been set up + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, FIRST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set first bit of z + hsa_signal_store_relaxed(z, ALL_BIT); + hsa_signal_and_release(z, FIRST_BIT); + + // wait until last bit of z has been set up + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, LAST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set last bit of y to 1 + hsa_signal_store_relaxed(y, ALL_BIT); + hsa_signal_and_release(y, LAST_BIT); + } + + // set y to -1 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != FIRST_BIT); + hsa_signal_store_release(y, -2); + hsa_signal_store_release(z, -2); + return; +} + +int test_signal_and_acquire_release_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values with setting last bit + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(0, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_and_acquire_release_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_and_acquire_release_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_and_acquire_release_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_and_atomic.c b/src/core/signals/test_signal_and_atomic.c new file mode 100644 index 0000000..cc262a3 --- /dev/null +++ b/src/core/signals/test_signal_and_atomic.c @@ -0,0 +1,753 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_and_atomic + * + * Purpose: + * Verify atomicity feature of signal operation + * + * Description: + * + * 1) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_and_acquire operation on the signal value. + * b) Thread 0 uses a rotating mask of ...1110, ..1110., .1110.., 1110..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...1101, ..1101., .1101.., 1101..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...1011, ..1011., .1011.., 1011..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...0111, ..0111., .0111.., 0111..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat 1000000 times. + * + * 2) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_and_release operation on the signal value. + * b) Thread 0 uses a rotating mask of ...1110, ..1110., .1110.., 1110..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...1101, ..1101., .1101.., 1101..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...1011, ..1011., .1011.., 1011..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...0111, ..0111., .0111.., 0111..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat 1000000 times. + * + * 3) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_and_relaxed operation on the signal value. + * b) Thread 0 uses a rotating mask of ...1110, ..1110., .1110.., 1110..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...1101, ..1101., .1101.., 1101..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...1011, ..1011., .1011.., 1011..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...0111, ..0111., .0111.., 0111..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat 1000000 times. + * + * 4) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_and_acq_rel operation on the signal value. + * b) Thread 0 uses a rotating mask of ...1110, ..1110., .1110.., 1110..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...1101, ..1101., .1101.., 1101..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...1011, ..1011., .1011.., 1011..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...0111, ..0111., .0111.., 0111..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat 1000000 times. + * + */ + +#include +#include +#include +#include +#include "config.h" + +typedef struct test_group test_group; + +// Define a structure to pass parameter to child function +typedef struct { + volatile hsa_signal_t signal_handle; + volatile int num; +} param; + +static void child_func_acquire(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xfff0ffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + // signal_value is signed integer, right shift will add 1 to highest bit + signal_value = signal_value >> 16; + } + #else + signal_value = 0xfff0ffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xff0fffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xff0fffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xf0ffffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xf0ffffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xffffffffffff0fff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x000000000000ffff; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x0000ffff; + } + #endif + break; + } + default: + break; + } + return; +} + +static void child_func_release(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xfff0ffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acquire(signal_handle, signal_value); + // signal_value is signed integer, right shift will add 1 to highest bit + signal_value = signal_value >> 16; + } + #else + signal_value = 0xfff0ffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_release(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xff0fffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_release(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xff0fffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_release(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xf0ffffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_release(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xf0ffffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_release(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xffffffffffff0fff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_release(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x000000000000ffff; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_release(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x0000ffff; + } + #endif + break; + } + default: + break; + } + return; +} + +static void child_func_relaxed(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xfff0ffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + // signal_value is signed integer, right shift will add 1 to highest bit + signal_value = signal_value >> 16; + } + #else + signal_value = 0xfff0ffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xff0fffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xff0fffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xf0ffffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xf0ffffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xffffffffffff0fff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x000000000000ffff; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x0000ffff; + } + #endif + break; + } + default: + break; + } + return; +} + +static void child_func_acq_rel(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xfff0ffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + // signal_value is signed integer, right shift will add 1 to highest bit + signal_value = signal_value >> 16; + } + #else + signal_value = 0xfff0ffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xff0fffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xff0fffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xf0ffffffffffffff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #else + signal_value = 0xf0ffffff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + signal_value = signal_value >> 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0xffffffffffff0fff; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x000000000000ffff; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_and_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + signal_value = signal_value | 0x0000ffff; + } + #endif + break; + } + default: + break; + } + return; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_signal_and_atomic_acquire() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 1 + hsa_signal_value_t initial_value = -1; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_acquire, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT iterations + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, -1); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_signal_and_atomic_release() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 1 + hsa_signal_value_t initial_value = -1; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_release, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT iterations + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, -1); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_signal_and_atomic_relaxed() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 1 + hsa_signal_value_t initial_value = -1; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_relaxed, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT iterations + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, -1); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_signal_and_atomic_acq_rel() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 1 + hsa_signal_value_t initial_value = -1; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_acq_rel, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT iterations. + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, -1); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_and_release_ordering.c b/src/core/signals/test_signal_and_release_ordering.c new file mode 100644 index 0000000..af9b74a --- /dev/null +++ b/src/core/signals/test_signal_and_release_ordering.c @@ -0,0 +1,182 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_and_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_and_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized to have + * the last bit set. + * 2) Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * 3) Start one thread that + * a) Checks the value of y in a loop using hsa_cas_acquire using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set . + * d) Replaces the x values by using signal_"store"_relaxed to set all bits, + * and then uses signal_and_relaxed to mask out all bits but the first. + * d) Replaces the value of y by using signal_"store"_relaxed to set all bits, + * and then uses signal_and_release to mask out all bits but the first. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +typedef struct { + volatile hsa_signal_t* signal_x; + volatile hsa_signal_t signal_y; +} param; + + +static void* test_signal_and_release_t1(void* arg) { + param* param_ptr = (param*)arg; + volatile hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (1 != hsa_signal_cas_acquire(signal_y, 1, 0)); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(1 == value, "The value of signal_x[%d] is not equal to 1!\n", ii); + // Set all bits of each x and mask out all bits but first + hsa_signal_store_relaxed(signal_x[ii], -1); + hsa_signal_and_relaxed(signal_x[ii], FIRST_BIT); + } + hsa_signal_store_relaxed(signal_y, -1); + hsa_signal_and_release(signal_y, FIRST_BIT); + } + return arg; +} + +static void* test_signal_and_release_t2(void* arg) { + param* param_ptr = (param*)arg; + volatile hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (FIRST_BIT != hsa_signal_cas_acquire(signal_y, FIRST_BIT, 0)); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(FIRST_BIT == value, "The value of x[%d] is not equal to the min negative value!\n", ii); + // Set all bits of each x and mask out all bits but last + hsa_signal_store_relaxed(signal_x[ii], -1); + hsa_signal_and_relaxed(signal_x[ii], 1); + } + hsa_signal_store_relaxed(signal_y, -1); + hsa_signal_and_release(signal_y, 1); + } + return arg; +} + +int test_signal_and_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_x[1024], signal_y; + // Set value of signal_x and signal_y to 1 + hsa_signal_value_t initial_value = 1; + int ii; + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_create(initial_value, 0, NULL, &signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_create(initial_value, 0, NULL, &signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + param arg[2]; + arg[0].signal_x = signal_x; + arg[0].signal_y = signal_y; + arg[1].signal_x = signal_x; + arg[1].signal_y = signal_y; + + pthread_t id[2]; + pthread_create(&id[0], NULL, test_signal_and_release_t1, &arg[0]); + pthread_create(&id[1], NULL, test_signal_and_release_t2, &arg[1]); + + pthread_join(id[0], NULL); + pthread_join(id[1], NULL); + + // Destroy signal + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_destroy(signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_destroy(signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_cas_acq_rel_ordering.c b/src/core/signals/test_signal_cas_acq_rel_ordering.c new file mode 100644 index 0000000..6d52687 --- /dev/null +++ b/src/core/signals/test_signal_cas_acq_rel_ordering.c @@ -0,0 +1,179 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_cas_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_cas_acq_rel + * API enforces correct memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create a control signal, denoted by y, also initialized + * to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acq_rel using + * 0 as the exchange value. + * b) When the value of y is 1, the thread stops looping, and + * c) Checks all of the x signal values with the signal_cas_relaxed + * API, expecting a value of 2, and replacing it with a value of 1. + * e) Sets the value of y to 2, using the signal_cas_acq_rel API. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if y equal to 1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to 2 +void test_signal_cas_acq_rel_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_acq_rel(y, 1, 0) != 1); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 2 and change to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_cas_relaxed(x[ii], 2, 1); + ASSERT(sig_val_t1 == 2); + } + + // change x value to 1 + hsa_signal_cas_acq_rel(y, 0, 2); + } + return; +} + + +// test func two: +// check y, if y equal to 2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to 1 +void test_signal_cas_acq_rel_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_acq_rel(y, 2, 0) != 2); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 2 and change to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_cas_relaxed(x[ii], 1, 2); + ASSERT(sig_val_t2 == 1); + } + + // change x value to 1 + hsa_signal_cas_acq_rel(y, 0, 1); + } + return; +} + +int test_signal_cas_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_cas_acq_rel_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_cas_acq_rel_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_cas_acq_rel_ordering_transitive.c b/src/core/signals/test_signal_cas_acq_rel_ordering_transitive.c new file mode 100644 index 0000000..2f3a7df --- /dev/null +++ b/src/core/signals/test_signal_cas_acq_rel_ordering_transitive.c @@ -0,0 +1,228 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_cas_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_cas_acquire and + * hsa_signal_cas_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Attempt to modify the value of y in a loop using signal_cas_acquire + * and using 0 as the exchange value and 1 as the condition. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the + * signal_cas_relaxed API. + * e) Sets the value of y to 2, using the signal_cas_release API, expecting + * the 0 as the condition. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until y is 2 using signal_wait_acquire. + * c) Sets the value of z to 2 using signal_cas_release with + * 1 as the condition. + * e) Waits until z is 1 using signal_wait_acquire. + * f) Set the value of y to 1 using signal_cas_release with + * 2 as the condition. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_cas_acq_rel_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until y = 1 or y = -1 + while ((y_val = hsa_signal_cas_acq_rel(y, 1, 0)) != 1) + if (y_val == -1) return; + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 1); + } + // set y to 2 + hsa_signal_cas_acq_rel(y, 0, 2); + } + return; +} + + +void test_signal_cas_acq_rel_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until z = 2 or z = -1 + while ((z_val = hsa_signal_cas_acq_rel(z, 2, 0)) != 2) + if (z_val == -1) return; + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 2); + } + // set z to 1 + hsa_signal_cas_acq_rel(z, 0, 1); + } + return; +} + +void test_signal_cas_acq_rel_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set z to 0 + hsa_signal_cas_acq_rel(z, 1, 2); + + // loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set y to 0 + hsa_signal_cas_acq_rel(y, 2, 1); + } + + // set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + return; +} + +int test_signal_cas_acq_rel_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(1, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_cas_acq_rel_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_cas_acq_rel_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_cas_acq_rel_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_cas_acquire_release_ordering.c b/src/core/signals/test_signal_cas_acquire_release_ordering.c new file mode 100644 index 0000000..3ab1b82 --- /dev/null +++ b/src/core/signals/test_signal_cas_acquire_release_ordering.c @@ -0,0 +1,178 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_cas_acquire_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_cas_release and + * hsa_signal_cas_acquire APIs enforce correct memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to 2. + * 2) Create a control signal, denoted by y, also initialized to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acquire using + * 0 as the exchange value. + * b) When the value of y is 1 it sets the value, the thread stops looping, and + * c) Checks all of the x signal values with the signal_cas_relaxed + * API, expecting a value of 2, and replacing it with a value of 1. + * e) Sets the value of y to 2, using the signal_cas_release API. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if y equal to 1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to 2 +void test_signal_cas_acquire_release_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_acquire(y, 1, 0) != 1); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 2 and change to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_cas_relaxed(x[ii], 2, 1); + ASSERT(sig_val_t1 == 2); + } + + // change y to 2 + hsa_signal_cas_release(y, 0, 2); + } + return; +} + + +// test func two: +// check y, if y equal to 2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to 1 +void test_signal_cas_acquire_release_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj ) { + // change y to 0 if y equals to 2 + while (hsa_signal_cas_acquire(y, 2, 0) != 2); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 and change to 2 + hsa_signal_value_t sig_val_t2 = hsa_signal_cas_relaxed(x[ii], 1, 2); + ASSERT(sig_val_t2 == 1); + } + + // change y to 2 + hsa_signal_cas_release(y, 0, 1); + } + return; +} + +int test_signal_cas_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_cas_acquire_release_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_cas_acquire_release_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_cas_acquire_release_ordering_transitive.c b/src/core/signals/test_signal_cas_acquire_release_ordering_transitive.c new file mode 100644 index 0000000..4472cce --- /dev/null +++ b/src/core/signals/test_signal_cas_acquire_release_ordering_transitive.c @@ -0,0 +1,228 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_cas_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_cas_acquire and + * hsa_signal_cas_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Attempt to modify the value of y in a loop using signal_cas_acquire + * and using 0 as the exchange value and 1 as the condition. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the + * signal_cas_relaxed API. + * e) Sets the value of y to 2, using the signal_cas_release API, expecting + * the 0 as the condition. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until y is 2 using signal_wait_acquire. + * c) Sets the value of z to 2 using signal_cas_release with + * 1 as the condition. + * e) Waits until z is 1 using signal_wait_acquire. + * f) Set the value of y to 1 using signal_cas_release with + * 2 as the condition. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_cas_acquire_release_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until y = 1 or y = -1 + while ((y_val = hsa_signal_cas_acquire(y, 1, 0)) != 1) + if (y_val == -1) return; + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 1); + } + // set y to 2 + hsa_signal_cas_release(y, 0, 2); + } + return; +} + + +void test_signal_cas_acquire_release_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until z = 2 or z = -1 + while ((z_val = hsa_signal_cas_acquire(z, 2, 0)) != 2) + if (z_val == -1) return; + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 2); + } + // set z to 1 + hsa_signal_cas_release(z, 0, 1); + } + return; +} + +void test_signal_cas_acquire_release_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set z to 0 + hsa_signal_cas_release(z, 1, 2); + + // loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set y to 0 + hsa_signal_cas_release(y, 2, 1); + } + + // set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + return; +} + +int test_signal_cas_acquire_release_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(1, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_cas_acquire_release_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_cas_acquire_release_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_cas_acquire_release_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_cas_atomic.c b/src/core/signals/test_signal_cas_atomic.c new file mode 100644 index 0000000..4faf9ac --- /dev/null +++ b/src/core/signals/test_signal_cas_atomic.c @@ -0,0 +1,466 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_cas_atomic + * + * Purpose: + * Verify atomicity feature of signal operation + * + * Description: + * + * 1) Init HsaRt, create an new signal, and initialize it to 0. + * Create 4 threads, that + * T1 call hsa_signal_cas_acquire, compare with 0 and set to 1 + * T2 call hsa_signal_cas_acquire, compare with 0 and set to 2 + * T3 call hsa_signal_cas_acquire, compare with 0 and set to 3 + * T4 call hsa_signal_cas_acquire, compare with 0 and set to 4 + * Check if only one thread returns 0 and load signal value check + * if the value equal to what is set by corresponding thread. + * + * 2) Init HsaRt, create an new signal, and initialize it to 0. + * Create 4 threads, that + * T1 call hsa_signal_cas_release, compare with 0 and set to 1 + * T2 call hsa_signal_cas_release, compare with 0 and set to 2 + * T3 call hsa_signal_cas_release, compare with 0 and set to 3 + * T4 call hsa_signal_cas_release, compare with 0 and set to 4 + * Check if only one thread returns 0 and load signal value check + * if the value equal to what is set by corresponding thread. + * + * 3) Init HsaRt, create an new signal, and initialize it to 0. + * Create 4 threads, that + * T1 call hsa_signal_cas_relaxed, compare with 0 and set to 1 + * T2 call hsa_signal_cas_relaxed, compare with 0 and set to 2 + * T3 call hsa_signal_cas_relaxed, compare with 0 and set to 3 + * T4 call hsa_signal_cas_relaxed, compare with 0 and set to 4 + * Check if only one thread returns 0 and load signal value check + * if the value equal to what is set by corresponding thread. + * + * 4) Init HsaRt, create an new signal, and initialize it to 0. + * Create 4 threads, that + * T1 call hsa_signal_cas_acq_rel, compare with 0 and set to 1 + * T2 call hsa_signal_cas_acq_rel, compare with 0 and set to 2 + * T3 call hsa_signal_cas_acq_rel, compare with 0 and set to 3 + * T4 call hsa_signal_cas_acq_rel, compare with 0 and set to 4 + * Check if only one thread returns 0 and load signal value check + * if the value equal to what is set by corresponding thread. + * + */ + +#include +#include +#include +#include +#include "config.h" + +typedef struct test_group test_group; + +// Define a structure to pass parameter to child function +typedef struct { + volatile hsa_signal_t signal_handle; + volatile int num; + volatile int* retval; +} param; + +static void child_func_acquire(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + *(param_ptr->retval) = hsa_signal_cas_acquire(signal_handle, 0, num+1); + + return; +} + +static void child_func_release(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + *(param_ptr->retval) = hsa_signal_cas_release(signal_handle, 0, num+1); + + return; +} + +static void child_func_relaxed(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + *(param_ptr->retval) = hsa_signal_cas_relaxed(signal_handle, 0, num+1); + + return; +} + +static void child_func_acq_rel(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + *(param_ptr->retval) = hsa_signal_cas_acq_rel(signal_handle, 0, num+1); + + return; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_signal_cas_atomic_acquire() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + int jj; + // Set parameter structure for each thread + volatile int retval[4]; + for (ii = 0; ii < 4; ++ii) + retval[ii] = 1000; // set to none 1, none 2, none 3, none 4 + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->retval = retval+ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_acquire, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, check if only on thread return 0 when calling cas API + int num_zero = 0, index; + for (jj = 0; jj < 4; ++jj) { + if (retval[jj] == 0) { + num_zero++; + index = jj+1; + } + } + ASSERT_MSG(1 == num_zero, "Only one zero should be observed!\n"); + // Load signal value, check if value equal to index + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(index == loaded_value, "Failed to perform CAS successfully!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_signal_cas_atomic_release() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + int jj; + // Set parameter structure for each thread + volatile int retval[4]; + for (ii = 0; ii < 4; ++ii) + retval[ii] = 1000; // set to none 1, none 2, none 3, none 4 + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->retval = retval+ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_release, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, check if only on thread return 0 when calling cas API + int num_zero = 0, index; + for (jj = 0; jj < 4; ++jj) { + if (retval[jj] == 0) { + num_zero++; + index = jj+1; + } + } + ASSERT_MSG(1 == num_zero, "Only one zero should be observed!\n"); + // Load signal value, check if value equal to index + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(index == loaded_value, "Failed to perform CAS successfully!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_signal_cas_atomic_relaxed() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + int jj; + // Set parameter structure for each thread + volatile int retval[4]; + for (ii = 0; ii < 4; ++ii) + retval[ii] = 1000; + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->retval = retval+ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_relaxed, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, check if only on thread return 0 when calling cas API + int num_zero = 0, index; + for (jj = 0; jj < 4; ++jj) { + if (retval[jj] == 0) { + num_zero++; + index = jj+1; + } + } + ASSERT_MSG(1 == num_zero, "Only one zero should be observed!\n"); + // Load signal value, check if value equal to index + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(index == loaded_value, "Failed to perform CAS successfully!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_signal_cas_atomic_acq_rel() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + int jj; + // Set parameter structure for each thread + volatile int retval[4]; + for (ii = 0; ii < 4; ++ii) + retval[ii] = 1000; + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->retval = retval+ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_acq_rel, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, check if only on thread return 0 when calling cas API + int num_zero = 0, index; + for (jj = 0; jj < 4; ++jj) { + if (retval[jj] == 0) { + num_zero++; + index = jj+1; + } + } + ASSERT_MSG(1 == num_zero, "Only one zero should be observed!\n"); + // Load signal value, check if value equal to index + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(index == loaded_value, "Failed to perform CAS successfully!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_create_concurrent.c b/src/core/signals/test_signal_create_concurrent.c new file mode 100644 index 0000000..d805175 --- /dev/null +++ b/src/core/signals/test_signal_create_concurrent.c @@ -0,0 +1,180 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_create_concurrent + * Scope: Conformance + * + * Purpose: Verifies that signals can be created concurrently in different + * threads. + * + * Test Description: + * 1) Start N threads that each + * a) Create M signals, that are maintained in a global list. + * b) When creating the symbols specify all agents as consumers. + * 2) After the signals have been created, have each agent wait on + * each of the signals. All agents should wait on a signal concurrently + * and all signals in the signal list should be waited on one at a time. + * 3) Set the signal values in another thread so the waiting agents wake + * up, as expected. + * 4) Destroy all of the signals in the main thread. + * + * Expected Results: All of the signals should be created successfully. + * All + * agents should be able to wait on all of the N*M threads successfully. + */ + +#include +#include +#include +#include +#include + +hsa_signal_t *signals; + +#define INI_VAL 0 +#define CMP_VAL 1 + +#define N 8 +#define M 32 + +void signal_create_func(void *data) { + hsa_status_t status; + int offset = (*(int *)data); + int ii; + const char *err_str; + for (ii = 0; ii < M; ++ii) { + status = hsa_signal_create(INI_VAL, 0, NULL, &signals[offset + ii]); + ASSERT_MSG(status == HSA_STATUS_SUCCESS, "\nErr_code: %d Err_string: %s\n", status, err_str); + } + return; +} + +void signals_wait_host_func(void *data) { + int ii; + hsa_agent_t *agent = (hsa_agent_t *)data; + for (ii = 0; ii < M*N; ++ii) { + hsa_signal_wait_acquire(signals[ii], HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + } + return; +} + +void signals_wait_component_func(void *data) { + int ii; + hsa_agent_t *agent = (hsa_agent_t *)data; + for (ii = 0; ii < M*N; ++ii) { + // Launch a kernel with signal_wait_func + hsa_signal_wait_acquire(signals[ii], HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + } + return; +} + +int test_signal_create_concurrent() { + int ii; + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + signals = (hsa_signal_t *)malloc(sizeof(hsa_signal_t) * N * M); + + struct test_group *tg_sg_create = test_group_create(N); + int *offset = (int *) malloc(sizeof(int) * N); + + for (ii = 0; ii < N; ++ii) { + offset[ii] = ii * M; + test_group_add(tg_sg_create, &signal_create_func, offset + ii, 1); + } + + test_group_thread_create(tg_sg_create); + test_group_start(tg_sg_create); + test_group_wait(tg_sg_create); + test_group_exit(tg_sg_create); + test_group_destroy(tg_sg_create); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + struct test_group *tg_sg_wait = test_group_create(agent_list.num_agents); + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_device_type_t device_type; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEVICE, &device_type); + ASSERT(status == HSA_STATUS_SUCCESS); + if (device_type == HSA_DEVICE_TYPE_CPU) { + test_group_add(tg_sg_wait, &signals_wait_host_func, &(agent_list.agents[ii]), 1); + } else if (device_type == HSA_DEVICE_TYPE_GPU) { + test_group_add(tg_sg_wait, &signals_wait_component_func, &(agent_list.agents[ii]), 1); + } else if (device_type == HSA_DEVICE_TYPE_DSP) { + ASSERT_MSG(1, "ERROR: DSP_AGENT NOT SUPPORTED\n"); + } else { + ASSERT_MSG(1, "ERROR: UNKNOWN DEVICE\n"); + } + } + + test_group_thread_create(tg_sg_wait); + test_group_start(tg_sg_wait); + + for (ii = 0; ii < N*M; ++ii) { + hsa_signal_store_relaxed(signals[ii], CMP_VAL); + } + test_group_wait(tg_sg_wait); + test_group_exit(tg_sg_wait); + test_group_destroy(tg_sg_wait); + + + for (ii = 0; ii < N*M; ++ii) { + status = hsa_signal_destroy(signals[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free_agent_list(&agent_list); + + free(signals); + free(offset); + + return 0; +} diff --git a/src/core/signals/test_signal_create_initial_value.c b/src/core/signals/test_signal_create_initial_value.c new file mode 100644 index 0000000..f4d7e64 --- /dev/null +++ b/src/core/signals/test_signal_create_initial_value.c @@ -0,0 +1,107 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_create_initial_value + * Scope: Conformance + * + * Purpose: Verifies that when signals are created the initial value + * specified in the API sets the signals value. + * + * Test Description: + * 1) Create a signal, specifying a positive initial signal value. + * 2) Query the value with an appropriate hsa_signal_load API call. + * 3) Repeat this using a signal value of 0. + * 4) Repeat using a signal value that is negative. + * + * Expected Results: All of the signals should be created successfully, + * and the initial value should be properly set. + */ + +#include +#include + +int test_signal_create_initial_value() { + hsa_status_t status; + hsa_signal_t signal; + hsa_signal_value_t signal_value; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create a signal with initial signal value 1 + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + // load signal value, and check if the return value equals to 1 + signal_value = hsa_signal_load_acquire(signal); + ASSERT(signal_value == 1); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create a signal with initial signal value 0 + status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + // load signal value, and check if the return value equals to 0 + signal_value = hsa_signal_load_acquire(signal); + ASSERT(signal_value == 0); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create a signal with initial signal value -1 + status = hsa_signal_create(-1, 0, NULL, &signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + // load signal value, and check if the return value equals to -1 + signal_value = hsa_signal_load_acquire(signal); + ASSERT(signal_value == -1); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/signals/test_signal_create_max_consumers.c b/src/core/signals/test_signal_create_max_consumers.c new file mode 100644 index 0000000..479c7d6 --- /dev/null +++ b/src/core/signals/test_signal_create_max_consumers.c @@ -0,0 +1,137 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_create_max_consumers + * Scope: Conformance + * + * Purpose: Verifies that when a signal is created with the num_consumers + * parameter set to the total number of agents and a consumers list + * that contains all agents, the signal can be waited on by all agent_list. + * + * Test Description: + * 1) Create a signal using the following parameters, + * a) A num_consumers value equal to the total number + * of agents on the system. + * b) A consumers list containing all of the agents + * in the system. + * 2) After the signal is created, have all of the agents in + * the system wait on the signal one at a time, + * either using the appropriate hsa_signal_wait API or a + * HSAIL instruction executed in a kernel. + * 3) Set the signal on another thread such that the waiting + * threads wait condition is satisfied. + * + * Expected Results: All of the agents should be able to properly wait + * on the signal. + */ + +#include +#include +#include +#include + +#define INI_VAL 0 +#define CMP_VAL 1 + +static void signal_wait_host_func(void *data) { + hsa_signal_t *signal_ptr = (hsa_signal_t*) data; + hsa_signal_wait_acquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + return; +} + +static void signal_wait_component_func(void *data) { + hsa_signal_t *signal_ptr = (hsa_signal_t*) data; + hsa_signal_wait_acquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + return; +} + +int test_signal_create_max_consumers() { + int ii; + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + hsa_signal_t signal; + status = hsa_signal_create(INI_VAL, agent_list.num_agents, agent_list.agents, &signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct test_group *tg_sg_wait = test_group_create(agent_list.num_agents); + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_device_type_t device_type; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEVICE, &device_type); + if (device_type == HSA_DEVICE_TYPE_CPU) { + test_group_add(tg_sg_wait, &signal_wait_host_func, &signal, 1); + } else if (device_type == HSA_DEVICE_TYPE_GPU) { + test_group_add(tg_sg_wait, &signal_wait_component_func, &signal, 1); + } else if (device_type == HSA_DEVICE_TYPE_DSP) { + ASSERT_MSG(1, "ERROR: DSP_AGENT NOT SUPPORTED\n"); + } else { + ASSERT_MSG(1, "ERROR: UNKOWN DEIVCE TYPE"); + } + } + + test_group_thread_create(tg_sg_wait); + test_group_start(tg_sg_wait); + + hsa_signal_store_relaxed(signal, CMP_VAL); + + test_group_wait(tg_sg_wait); + test_group_exit(tg_sg_wait); + test_group_destroy(tg_sg_wait); + + free_agent_list(&agent_list); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/signals/test_signal_create_one_consumers.c b/src/core/signals/test_signal_create_one_consumers.c new file mode 100644 index 0000000..075169a --- /dev/null +++ b/src/core/signals/test_signal_create_one_consumers.c @@ -0,0 +1,136 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_create_one_consumers + * Scope: Conformance + * + * Purpose: Verifies that when a signal is created with the num_consumers + * parameter set to 1 that the consumers array is used to create + * a signal that can be consumed by that agent. + * + * Test Description: + * 1) Create a signal using the following parameters, + * a) A num_consumers value of 1. + * b) A consumers list containing 1 agent. + * 2) After the signal is created, have the specified agent + * wait on the signal either using the appropriate hsa_signal_wait + * API or a HSAIL instruction executed in a kernel. + * 3) Set the signal on another thread such that the waiting + * threads wait condition is satisfied. + * 4) Repeat for all agents in the system. + * + * Expected Results: All of the agents should be able to properly wait + * on the signal create for them. + */ + +#include +#include +#include +#include + +#define CMP_VAL 1 +#define INI_VAL 0 + +static void signal_wait_host_func(void *data) { + hsa_signal_t* signal_ptr = (hsa_signal_t*) data; + hsa_signal_wait_acquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + return; +} + +static void signal_wait_component_func(void *data) { + hsa_signal_t* signal_ptr = (hsa_signal_t*) data; + hsa_signal_wait_acquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + return; +} + +int test_signal_create_one_consumers() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + hsa_signal_t signal; + hsa_signal_create(INI_VAL, 1, &(agent_list.agents[0]), &signal); + + struct test_group *tg_sg_wait = test_group_create(agent_list.num_agents); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ii++) { + hsa_device_type_t device_type; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEVICE, &device_type); + if (device_type == HSA_DEVICE_TYPE_CPU) { + test_group_add(tg_sg_wait, &signal_wait_host_func, &signal, 1); + } else if (device_type == HSA_DEVICE_TYPE_GPU) { + test_group_add(tg_sg_wait, &signal_wait_component_func, &signal, 1); + } else if (device_type == HSA_DEVICE_TYPE_DSP) { + ASSERT_MSG(1, "ERROR: DSP_AGENT NOT SUPPORTED\n"); + } else { + ASSERT_MSG(1, "ERROR: UNKOWN DEIVCE TYPE"); + } + } + + test_group_thread_create(tg_sg_wait); + test_group_start(tg_sg_wait); + + hsa_signal_store_relaxed(signal, CMP_VAL); + + test_group_wait(tg_sg_wait); + test_group_exit(tg_sg_wait); + test_group_destroy(tg_sg_wait); + + free_agent_list(&agent_list); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} + diff --git a/src/core/signals/test_signal_create_zero_consumers.c b/src/core/signals/test_signal_create_zero_consumers.c new file mode 100644 index 0000000..9f02004 --- /dev/null +++ b/src/core/signals/test_signal_create_zero_consumers.c @@ -0,0 +1,136 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_create_zero_consumers + * Scope: Conformance + * + * Purpose: Verifies that when a signal is created with the num_consumers + * parameter set to 0 that the consumers array is ignored and that all + * agents become signal consumers by default. + * + * Test Description: + * 1) Create a signal using the following parameters, + * a) A num_consumers value of 0. + * b) A consumers list containing 1 agent (the component) + * 2) After the signal is created, have all of the agents in + * the system wait on the signal one at a time, + * either using the appropriate hsa_signal_wait API or a + * HSAIL instruction executed in a kernel. + * 3) Set the signal on another thread such that the waiting + * threads wait condition is satisfied. + * + * Expected Results: All of the agents should be able to properly wait + * on the signal. + */ + +#include +#include +#include +#include + +#define CMP_VAL 1 +#define INI_VAL 0 + +static void signal_wait_host_func(void *data) { + hsa_signal_t* signal_ptr = (hsa_signal_t*) data; + hsa_signal_wait_acquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + return; +} + +static void signal_wait_component_func(void *data) { + hsa_signal_t* signal_ptr = (hsa_signal_t*) data; + hsa_signal_wait_acquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + return; +} + +int test_signal_create_zero_consumers() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + hsa_signal_t signal; + hsa_signal_create(INI_VAL, 0, &(agent_list.agents[0]), &signal); + + struct test_group *tg_sg_wait = test_group_create(agent_list.num_agents); + + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + hsa_device_type_t device_type; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_DEVICE, &device_type); + if (device_type == HSA_DEVICE_TYPE_CPU) { + test_group_add(tg_sg_wait, &signal_wait_host_func, &signal, 1); + } else if (device_type == HSA_DEVICE_TYPE_GPU) { + test_group_add(tg_sg_wait, &signal_wait_component_func, &signal, 1); + } else if (device_type == HSA_DEVICE_TYPE_DSP) { + ASSERT_MSG(1, "ERROR: DSP_AGENT NOT SUPPORTED\n"); + } else { + ASSERT_MSG(1, "ERROR: UNKOWN DEIVCE TYPE"); + } + } + + test_group_thread_create(tg_sg_wait); + test_group_start(tg_sg_wait); + + hsa_signal_store_relaxed(signal, CMP_VAL); + + test_group_wait(tg_sg_wait); + test_group_exit(tg_sg_wait); + test_group_destroy(tg_sg_wait); + + free_agent_list(&agent_list); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} + diff --git a/src/core/signals/test_signal_destroy_concurrent.c b/src/core/signals/test_signal_destroy_concurrent.c new file mode 100644 index 0000000..d894161 --- /dev/null +++ b/src/core/signals/test_signal_destroy_concurrent.c @@ -0,0 +1,131 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_create_concurrent + * Scope: Conformance + * + * Purpose: Verifies that signals can be created concurrently in different + * threads. + * + * Test Description: + * 1) Start N threads that each + * a) Create M signals, that are maintained in a global list. + * b) When creating the symbols specify all agents as consumers. + * 2) After the signals have been created, have each agent wait on + * each of the signals. All agents should wait on a signal concurrently + * and all signals in the signal list should be waited on one at a time. + * 3) Set the signal values in another thread so the waiting agents wake + * up, as expected. + * 4) Destroy all of the signals in the main thread. + * + * Expected Results: All of the signals should be created successfully. + * All + * agents should be able to wait on all of the N*M threads successfully. + */ + +#include +#include +#include +#include +#include + +hsa_signal_t *signals; + +#define INI_VAL 0 + +#define N 8 +#define M 32 + +void signal_destroy_func(void *data) { + hsa_status_t status; + int offset = (*(int *)data); + int ii; + const char *err_str; + for (ii = 0; ii < M; ii++) { + status = hsa_signal_destroy(signals[offset + ii]); + ASSERT_MSG(status == HSA_STATUS_SUCCESS, "\nErr_code: %d Err_string: %s\n", status, err_str); + } + return; +} + +int test_signal_destroy_concurrent() { + int ii; + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + signals = (hsa_signal_t *)malloc(sizeof(hsa_signal_t) * N * M); + + struct test_group *tg_sg_destroy = test_group_create(N); + int *offset = (int *)malloc(sizeof(int) * N); + + for (ii = 0; ii < N; ++ii) { + int jj; + offset[ii] = ii * M; + for (jj = 0; jj < M; ++jj) { + status = hsa_signal_create(INI_VAL, 0, NULL, &signals[ii * M + jj]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + } + + for (ii = 0; ii < N; ++ii) { + test_group_add(tg_sg_destroy, &signal_destroy_func, offset + ii, 1); + } + + test_group_thread_create(tg_sg_destroy); + test_group_start(tg_sg_destroy); + test_group_wait(tg_sg_destroy); + test_group_exit(tg_sg_destroy); + test_group_destroy(tg_sg_destroy); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free(signals); + free(offset); + + return 0; +} diff --git a/src/core/signals/test_signal_exchange_acq_rel_ordering.c b/src/core/signals/test_signal_exchange_acq_rel_ordering.c new file mode 100644 index 0000000..e4e14e9 --- /dev/null +++ b/src/core/signals/test_signal_exchange_acq_rel_ordering.c @@ -0,0 +1,179 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_exchange_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_exchange_acq_rel + * API enforces correct memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create a control signal, denoted by y, also initialized + * to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_exchange_acq_rel using + * 0 as the exchange value. + * b) When the value of y is 1, the thread stops looping, and + * c) Checks all of the x signal values with the signal_exchange_relaxed + * API, expecting a value of 2, and replacing it with a value of 1. + * e) Sets the value of y to 2, using the signal_exchange_acq_rel API. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if y equal to 1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to 2 +void test_signal_exchange_acq_rel_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_load_relaxed(y) != 1); + hsa_signal_exchange_acq_rel(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 2 and change to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_exchange_relaxed(x[ii], 1); + ASSERT(sig_val_t1 == 2); + } + // change y to 2 + hsa_signal_exchange_acq_rel(y, 2); + } + return; +} + + +// test func two: +// check y, if y equal to 2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to 1 +void test_signal_exchange_acq_rel_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 2 + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_exchange_acq_rel(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 and change to 2 + hsa_signal_value_t sig_val_t2 = hsa_signal_exchange_relaxed(x[ii], 2); + ASSERT(sig_val_t2 == 1); + } + // change y to 1 + hsa_signal_exchange_acq_rel(y, 1); + } + return; +} + +int test_signal_exchange_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_exchange_acq_rel_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_exchange_acq_rel_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_exchange_acq_rel_ordering_transitive.c b/src/core/signals/test_signal_exchange_acq_rel_ordering_transitive.c new file mode 100644 index 0000000..10151cc --- /dev/null +++ b/src/core/signals/test_signal_exchange_acq_rel_ordering_transitive.c @@ -0,0 +1,234 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_exchange_acq_rel_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_exchange_acquire and + * hsa_signal_exchange_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Check the value of y in a loop using signal_exchange_acq_rel + * and using 0 as the exchange value. + * b) When the value of y is 1, the thread stops looping and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the + * signal_exchange_relaxed API. + * e) Sets the value of y to 2, using the signal_exchange_acq_rel API. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until the value of y is 2 using signal_wait_acquire. + * b) Sets the value of z to 2 using signal_exchange_acq_rel. + * c) Waits until the value of z 1 signal_wait_acquire. + * d) Sets the value of y to 1 using signal_exchange_acq_rel + * e) Starts over. + * f) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_exchange_acq_rel_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until y = 0 or y = -1 + while ((y_val = hsa_signal_load_acquire(y)) != 1) + if (y_val == -1) return; + + hsa_signal_exchange_acq_rel(y, 0); + + for (ii = 0; ii < NUM_X; ii++) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 1); + } + + // set z to 1 + hsa_signal_exchange_acq_rel(y, 2); + } + return; +} + + +void test_signal_exchange_acq_rel_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until z = 0 or z = -1 + while ((z_val = hsa_signal_load_acquire(z)) != 2) + if (z_val == -1) return; + + hsa_signal_exchange_acquire(z, 0); + + for (ii = 0; ii < NUM_X; ii++) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 2); + } + + // set z to 1 + hsa_signal_exchange_acq_rel(z, 1); + } + return; +} + + +void test_signal_exchange_acq_rel_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ii++) { + // wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set z to 0 + hsa_signal_exchange_acq_rel(z, 2); + + // loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set y to 0 + hsa_signal_exchange_acq_rel(y, 1); + } + + // set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + + return; +} + +int test_signal_exchange_acq_rel_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ii++) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(0, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_exchange_acq_rel_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_exchange_acq_rel_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_exchange_acq_rel_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ii++) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_exchange_acquire_release_ordering.c b/src/core/signals/test_signal_exchange_acquire_release_ordering.c new file mode 100644 index 0000000..9c24d40 --- /dev/null +++ b/src/core/signals/test_signal_exchange_acquire_release_ordering.c @@ -0,0 +1,175 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_exchange_acquire_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_exchange_acquire_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to 1. + * 2) Create a control signal, denoted by y, also initialized to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acq_rel using + * 0 as the exchange value. + * b) When the value of y is 1 it sets the value, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2, and replacing it with a value of 1 by + * calling signal_add_relaxed to add -1 to the value. + * e) Sets the value of y to 2, using the signal_exchange_acquire_release API + * to add value of 2. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2, 2 in place of 1 and 1 in place of -1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if y equal to 1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to 2 +void test_signal_exchange_acquire_release_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + while (hsa_signal_load_relaxed(y) != 1); + hsa_signal_exchange_acquire(y, 0); + + for (jj = 0; jj < NUM_X; ++jj) { + hsa_signal_value_t sig_val_t1 = hsa_signal_exchange_relaxed(x[jj], 1); + ASSERT(sig_val_t1 == 2); + } + + hsa_signal_exchange_release(y, 2); + } + return; +} + + +// test func two: +// check y, if y equal to 2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to 1 +void test_signal_exchange_acquire_release_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_exchange_acquire(y, 0); + + for (jj = 0; jj < NUM_X; ++jj) { + hsa_signal_value_t sig_val_t2 = hsa_signal_exchange_relaxed(x[jj], 2); + ASSERT(sig_val_t2 == 1); + } + + hsa_signal_exchange_release(y, 1); + } + return; +} + +int test_signal_exchange_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_exchange_acquire_release_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_exchange_acquire_release_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_exchange_acquire_release_ordering_transitive.c b/src/core/signals/test_signal_exchange_acquire_release_ordering_transitive.c new file mode 100644 index 0000000..3a7bf37 --- /dev/null +++ b/src/core/signals/test_signal_exchange_acquire_release_ordering_transitive.c @@ -0,0 +1,234 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_exchange_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_exchange_acquire and + * hsa_signal_exchange_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Check the value of y in a loop using signal_exchange_acquire + * and using 0 as the exchange value. + * b) When the value of y is 1, the thread stops looping and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the + * signal_store_relaxed API. + * e) Sets the value of y to 2, using the signal_store_release API. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until y is 2 using signal_wait_acquire. + * c) Sets the value of z to 2 using signal_exchange_release. + * e) Waits until z is 1 using signal_wait_acquire + * f) Set the value of y to 1 using signal_exchange_release + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_exchange_acquire_release_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until y = 0 or y = -1 + while ((y_val = hsa_signal_load_acquire(y)) != 1) + if (y_val == -1) return; + + hsa_signal_exchange_acquire(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 1); + } + + // set z to 1 + hsa_signal_store_release(y, 2); + } + return; +} + + +void test_signal_exchange_acquire_release_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until z = 0 or z = -1 + while ((z_val = hsa_signal_load_acquire(z)) != 2) + if (z_val == -1) return; + + hsa_signal_exchange_acquire(z, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 2); + } + + // set z to 1 + hsa_signal_store_release(z, 1); + } + return; +} + + +void test_signal_exchange_acquire_release_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set z to 0 + hsa_signal_exchange_release(z, 2); + + // loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set y to 0 + hsa_signal_exchange_release(y, 1); + } + + // set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + + return; +} + +int test_signal_exchange_acquire_release_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(0, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_exchange_acquire_release_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_exchange_acquire_release_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_exchange_acquire_release_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_exchange_atomic.c b/src/core/signals/test_signal_exchange_atomic.c new file mode 100644 index 0000000..907325e --- /dev/null +++ b/src/core/signals/test_signal_exchange_atomic.c @@ -0,0 +1,461 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_exchange_atomic + * + * Purpose: + * Verify atomicity feature of signal operation + * + * Description: + * + * 1) Create a signal. + * Create 4 threads, that + * call hsa_signal_exchange_acquire, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of exchanges, with no + * explicit synchronization between the threads. + * + * 2) Create a signal. + * Create 4 threads, that + * call hsa_signal_exchange_release, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of exchanges, with no + * explicit synchronization between the threads. + * + * 3) Create a signal. + * Create 4 threads, that + * call hsa_signal_exchange_relaxed, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of exchanges, with no + * explicit synchronization between the threads. + * + * 4) Create a signal. + * Create 4 threads, that + * call hsa_signal_exchange_acq_rel, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of exchanges, with no + * explicit synchronization between the threads. + * + */ + +#include +#include +#include +#include +#include "config.h" + +typedef struct test_group test_group; + +typedef enum OP_TYPE_T { + OP_TYPE_ACQUIRE, + OP_TYPE_ACQ_REL, + OP_TYPE_RELEASE, + OP_TYPE_RELAXED +} OP_TYPE_T; + +#ifdef HSA_LARGE_MODEL + #define NO_BITS 0x0000000000000000 + #define ALL_BITS 0xffffffffffffffff + #define ALT_BITS_1 0x5555555555555555 + #define ALT_BITS_2 0xaaaaaaaaaaaaaaaa +#else + #define NO_BITS 0x00000000 + #define ALL_BITS 0xffffffff + #define ALT_BITS_1 0x55555555 + #define ALT_BITS_2 0xaaaaaaaa +#endif + +// Define a structure to pass parameter to child function +typedef struct { + hsa_signal_t signal_handle; + int num; + OP_TYPE_T type; +} param; + +static void child_func(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + OP_TYPE_T type = param_ptr->type; + + // Different thread behaves differently + hsa_signal_value_t value, signal_value; + switch (num) { + case 0: { + signal_value = NO_BITS; + break; + } + case 1: { + signal_value = ALL_BITS; + break; + } + case 2: { + signal_value = ALT_BITS_1; + break; + } + case 3: { + signal_value = ALT_BITS_2; + break; + } + default: + ASSERT(num < 4); + } + + int ii; + switch (type) { + case OP_TYPE_ACQUIRE : { + for (ii = 0; ii < OP_COUNT; ++ii) { + value = hsa_signal_exchange_acquire(signal_handle, signal_value); + } + } + case OP_TYPE_ACQ_REL : { + for (ii = 0; ii < OP_COUNT; ++ii) { + value = hsa_signal_exchange_acq_rel(signal_handle, signal_value); + } + } + case OP_TYPE_RELEASE : { + for (ii = 0; ii < OP_COUNT; ++ii) { + value = hsa_signal_exchange_release(signal_handle, signal_value); + } + } + case OP_TYPE_RELAXED : { + for (ii = 0; ii < OP_COUNT; ++ii) { + value = hsa_signal_exchange_relaxed(signal_handle, signal_value); + } + } + } + + return; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_signal_exchange_atomic_acquire() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_ACQUIRE; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_signal_exchange_atomic_release() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_RELEASE; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_signal_exchange_atomic_relaxed() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_RELAXED; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_signal_exchange_atomic_acq_rel() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_ACQ_REL; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + } + + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_handle_width.c b/src/core/signals/test_signal_handle_width.c new file mode 100644 index 0000000..5def242 --- /dev/null +++ b/src/core/signals/test_signal_handle_width.c @@ -0,0 +1,74 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_handle_width + * Scope: Conformance + * + * Purpose: Verifies that hsa_signal_handle_t data type has the correct + * width + * + * Test Description: + * 1) Check the size of the hsa_signal_value_t data type. + * + * Expected Results: The width of the data type 64 bits. + */ + +#include +#include +#include "config.h" + +int test_signal_handle_width() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + hsa_signal_t signal; + + int size =s izeof(signal); + ASSERT(size == 8); + + return 0; +} diff --git a/src/core/signals/test_signal_kernel_multi_set.c b/src/core/signals/test_signal_kernel_multi_set.c new file mode 100644 index 0000000..d761422 --- /dev/null +++ b/src/core/signals/test_signal_kernel_multi_set.c @@ -0,0 +1,256 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_kernel_multi_set + * Scope: Conformance + * + * Purpose: Verifies that a kernel can be executed and that the kernel + * workitems can modify different signal's concurrently. + * + * Test Description: + * 1) Create several signals. + * 2) Launch a kernel with a several workitems, one for each signal. + * The kernel should be launched with HSA_FENCE_SCOPE_SYSTEM + * for both the acquire and release scopes. + * 3) Use an HSAIL instruction to modify the value of the signal associated with + * the workitem. + * 4) After the kernel finishes executing, check the values of the signals using a + * hsa_signal_ld API. + * + * Expected Results: After the kernel finishes executing, the value of all the + * signals should be modified to the correct values. + */ + +#include +#include +#include +#include +#include + +int test_signal_kernel_multi_set() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("signal_operations.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 != global_region.handle) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__signal_st_rlx_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) signal_args_s { + uint32_t count; + hsa_signal_t* signal_handles; + hsa_signal_value_t* signal_values; + } signal_args_t; + signal_args_t signal_args; + + // Allocate the kernel argument buffer from the correct region + signal_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + const size_t num_kernels = 16; + + // Create the completion signal + hsa_signal_t completion_signal; + status = hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the kernel signal + hsa_signal_t kernel_signals[num_kernels]; + int jj; + for (jj = 0; jj < num_kernels; ++jj) { + status = hsa_signal_create(1, 0, NULL, kernel_signals + jj); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Allocate and initialize the set value + hsa_signal_value_t *set_values; + status = hsa_memory_allocate(global_region, num_kernels * sizeof(hsa_signal_value_t), (void**) &set_values); + ASSERT(HSA_STATUS_SUCCESS == status); + memset(set_values, 0, num_kernels * sizeof(hsa_signal_value_t)); + + // Fill in the kernel argument list + signal_args.count = num_kernels; + signal_args.signal_handles = kernel_signals; + signal_args.signal_values = set_values; + memcpy(kernarg_buffer, &signal_args, symbol_record.kernarg_segment_size); + + // Setup the dispatch packet. + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = num_kernels; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = num_kernels; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Wait on the completion signal + hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Verify the signal values were changed by the kernel + for (jj = 0; jj < num_kernels; ++jj) { + hsa_signal_value_t val = hsa_signal_load_relaxed(kernel_signals[jj]); + ASSERT(set_values[jj] == val); + } + + for (jj = 0; jj < num_kernels; ++jj) { + status = hsa_signal_destroy(kernel_signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_signal_destroy(completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(set_values); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/signals/test_signal_kernel_multi_wait.c b/src/core/signals/test_signal_kernel_multi_wait.c new file mode 100644 index 0000000..6186abb --- /dev/null +++ b/src/core/signals/test_signal_kernel_multi_wait.c @@ -0,0 +1,250 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_kernel_wait + * Scope: Conformance + * + * Purpose: Verifies that a kernel can wait on several signals simultaneously, + * one for each workitem. + * + * Test Description: + * 1) Create several signals. + * 2) Launch a kernel with a several workitems, one for each signal. + * The kernel should be launched with HSA_FENCE_SCOPE_SYSTEM + * for both the acquire and release scopes. + * 3) Use an HSAIL instruction to wait on the value of the appropriate signal + * in the workitem. + * 4) Modify the value of the each signal on the host side. + * + * Expected Results: The kernel should be able to wait on the signals and wake + * up after all the signal values have changed. + * + */ + +#include +#include +#include +#include +#include + +int test_signal_kernel_multi_wait() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("signal_operations.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region, &global_region); + if ((uint64_t)-1 != global_region.handle) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__signal_wait_eq_rlx_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) signal_args_s { + uint32_t count; + hsa_signal_t* signal_handles; + hsa_signal_value_t* wait_values; + } signal_args_t; + signal_args_t signal_args; + + // Allocate the kernel argument buffer from the correct region + signal_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + const size_t num_kernels = 16; + + // Create the completion signal + hsa_signal_t completion_signal; + status = hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the kernel signal + hsa_signal_t kernel_signals[num_kernels]; + int jj; + for (jj = 0; jj < num_kernels; ++jj) { + status = hsa_signal_create(1, 0, NULL, kernel_signals + jj); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Allocate and initialize the wait values + hsa_signal_value_t *wait_values; + status = hsa_memory_allocate(global_region, num_kernels * sizeof(hsa_signal_value_t), (void**) &wait_values); + ASSERT(HSA_STATUS_SUCCESS == status); + memset(wait_values, 0, num_kernels * sizeof(hsa_signal_value_t)); + + // Fill in the kernel argument list + signal_args.count = num_kernels; + signal_args.signal_handles = kernel_signals; + signal_args.wait_values = wait_values; + memcpy(kernarg_buffer, &signal_args, symbol_record.kernarg_segment_size); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = num_kernels; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = num_kernels; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Set the signal value to the wait_value + for (jj = 0; jj < num_kernels; ++jj) { + hsa_signal_store_relaxed(kernel_signals[jj], wait_values[jj]); + } + + // Wait on the completion signal + hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + for (jj = 0; jj < num_kernels; ++jj) { + status = hsa_signal_destroy(kernel_signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_signal_destroy(completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(wait_values); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy program + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/signals/test_signal_kernel_set.c b/src/core/signals/test_signal_kernel_set.c new file mode 100644 index 0000000..b2c8ea8 --- /dev/null +++ b/src/core/signals/test_signal_kernel_set.c @@ -0,0 +1,248 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * + * Test Name: signal_kernel_set + * Scope: Conformance + * + * Purpose: Verifies that a signal handle can be passed as a kernel argument + * and used during kernel execution. + * + * Test Description: + * 1) Create a signal. + * 2) Launch a kernel with a single workitem, passing the signal as + * a kernel argument. The kernel should be launched with HSA_FENCE_SCOPE_SYSTEM + * for both the acquire and release scopes. + * 3) Use an HSAIL instruction to modify the value of the signal. + * 4) After the kernel finishes executing, check the value using a + * hsa_signal_ld API. + * + * Expected Results: After the kernel finishes executing, the value of the + * signal should be modified to the correct value. + * + */ + +#include +#include +#include +#include +#include + +int test_signal_kernel_set() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("signal_operations.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 != global_region.handle) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__signal_st_rlx_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) signal_args_s { + uint32_t count; + hsa_signal_t* signal_handles; + hsa_signal_value_t* signal_values; + } signal_args_t; + signal_args_t signal_args; + + // Allocate the kernel argument buffer from the correct region + signal_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the completion signal + hsa_signal_t completion_signal; + status = hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the kernel signal + hsa_signal_t kernel_signal; + status = hsa_signal_create(1, 0, NULL, &kernel_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate and initialize the set value + hsa_signal_value_t *set_value; + status = hsa_memory_allocate(global_region, sizeof(hsa_signal_value_t), (void**) &set_value); + ASSERT(HSA_STATUS_SUCCESS == status); + *set_value = 0; + + // Fill in the kernel argument list + signal_args.count = 1; + signal_args.signal_handles = &kernel_signal; + signal_args.signal_values = set_value; + memcpy(kernarg_buffer, &signal_args, symbol_record.kernarg_segment_size); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Wait on the completion signal + hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Check kernel signal + *set_value = hsa_signal_load_relaxed(kernel_signal); + ASSERT(0 == *set_value); + + status = hsa_signal_destroy(kernel_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_signal_destroy(completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(set_value); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/signals/test_signal_kernel_wait.c b/src/core/signals/test_signal_kernel_wait.c new file mode 100644 index 0000000..c149947 --- /dev/null +++ b/src/core/signals/test_signal_kernel_wait.c @@ -0,0 +1,244 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_kernel_wait + * Scope: Conformance + * + * Purpose: Verifies that a signal handle can be passed as a kernel argument + * and waited on by a kernel. + * + * Test Description: + * 1) Create a signal. + * 2) Launch a kernel with a single workitem, passing the signal as + * a kernel argument. The kernel should be launched with HSA_FENCE_SCOPE_SYSTEM + * for both the acquire and release scopes. + * 3) Use an HSAIL instruction to wait on the value of the signal. + * 4) Modify the value of the signal on the host side. + * + * Expected Results: The kernel should be able to wait on the signal and wake + * up after the signal value is changed. + */ + +#include +#include +#include +#include +#include + +int test_signal_kernel_wait() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("signal_operations.brig", &module)); + + // Get a list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + // Check if the queue supports dispatch + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Find a memory region that supports fine grained memory + hsa_region_t global_region; + global_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + if ((uint64_t)-1 != global_region.handle) { + continue; + } + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Create a queue + hsa_queue_t* queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + char* symbol_names[1]; + symbol_names[0] = "&__signal_wait_eq_rlx_kernel"; + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, symbol_names, &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // The kernarg data structure + typedef struct __attribute__ ((aligned(16))) signal_args_s { + uint32_t count; + hsa_signal_t* signal_handles; + hsa_signal_value_t* wait_values; + } signal_args_t; + signal_args_t signal_args; + + // Allocate the kernel argument buffer from the correct region + signal_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the completion signal + hsa_signal_t completion_signal; + status = hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the kernel signal + hsa_signal_t kernel_signal; + status = hsa_signal_create(1, 0, NULL, &kernel_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate and initialize the wait value + hsa_signal_value_t *wait_value; + status = hsa_memory_allocate(global_region, sizeof(hsa_signal_value_t), (void**) &wait_value); + ASSERT(HSA_STATUS_SUCCESS == status); + *wait_value = 0; + + // Fill in the kernel argument list + signal_args.count = 1; + signal_args.signal_handles = &kernel_signal; + signal_args.wait_values = wait_value; + memcpy(kernarg_buffer, &signal_args, symbol_record.kernarg_segment_size); + + // Setup the dispatch packet. + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 1; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 1; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Set the signal value to the wait_value + hsa_signal_store_relaxed(kernel_signal, *wait_value); + + // Wait on the completion signal + hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + status = hsa_signal_destroy(kernel_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_signal_destroy(completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(wait_value); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Destroy the loaded module + destroy_module(module); + + // Shutdown runtime + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/signals/test_signal_load_store_atomic.c b/src/core/signals/test_signal_load_store_atomic.c new file mode 100644 index 0000000..81f646b --- /dev/null +++ b/src/core/signals/test_signal_load_store_atomic.c @@ -0,0 +1,167 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_load_store_atomic + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_load and store operations are atomic, + * and 'torn' loads or stores do not occur when these APIs are executed + * concurrently. + * + * Test Description: + * 1) Create a signal. + * 2) Create 2 threads, that + * a) Update the signal value, the first to 0 and the second to INT(32|64)_MAX. + * 3) Create 2 threads, that + * b) Read the signal value, and check if it 0 or INT(32|64)_MAX. + * 4) Run the threads for millions of iterations of loads and stores, with no + * explicit synchronization between the threads. + * 5) Repeat for all versions of the hsa_signal_load and store APIs, i.e. acquire, + * release and relaxed memory ordering versions. + * + * Expected Results: The reading threads should only see two possible signal values, + * 0 or INT(32|64)_MAX. + */ + +#include +#include +#include +#include + +#ifdef HSA_LARGE_MODEL +#define MAX_VAL INT64_MAX +#else +#define MAX_VAL INT32_MAX +#endif + +#define NUM_ITER 10000 + +// Set signal value to zero +void set_signal_zero(void *data) { + hsa_signal_t signal = (*(hsa_signal_t *)data); + int rand_num = rand() % 2; + // Randomly choose store function + if (rand_num) + hsa_signal_store_relaxed(signal, 0); + else + hsa_signal_store_release(signal, 0); +} + +// Set signal value to INT(32|64)_MAX +void set_signal_max(void *data) { + hsa_signal_t signal = (*(hsa_signal_t *)data); + int rand_num = rand() % 2; + // Randomly choose store function + if (rand_num) + hsa_signal_store_relaxed(signal, MAX_VAL); + else + hsa_signal_store_release(signal, MAX_VAL); +} + +// Read signal value and check if the returned value is either ZERO or MAX +void read_signal(void *data) { + hsa_signal_t signal = (*(hsa_signal_t *)data); + int rand_num = rand() % 2; + hsa_signal_value_t signal_val; + // Randomly choose load function + if (rand_num) + signal_val = hsa_signal_load_acquire(signal); + else + signal_val = hsa_signal_load_relaxed(signal); + ASSERT(signal_val == 0 || signal_val == MAX_VAL); +} + +int test_signal_load_store_atomic() { + // Init hsa_runtime + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Create signal + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Create 2 threads for setting signal value, one is to set signal value to zero, + // one is to set signal value to MAX + struct test_group *tg_set_signal = test_group_create(2); + test_group_add(tg_set_signal, &set_signal_zero, &signal, 1); + test_group_add(tg_set_signal, &set_signal_max, &signal, 1); + test_group_thread_create(tg_set_signal); + + + // Create 2 threads for loading signal value + struct test_group *tg_load_signal = test_group_create(2); + test_group_add(tg_load_signal, &read_signal, &signal, 2); + test_group_thread_create(tg_load_signal); + + int ii; + for (ii = 0; ii < NUM_ITER; ++ii) { + // Start threads for setting signal value and wait them finish + test_group_start(tg_set_signal); + test_group_wait(tg_set_signal); + + // Start threads for reading signal value + test_group_start(tg_load_signal); + test_group_wait(tg_load_signal); + } + + // Exit threads + test_group_exit(tg_set_signal); + test_group_exit(tg_load_signal); + + // Cleanup resources + test_group_destroy(tg_set_signal); + test_group_destroy(tg_load_signal); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + status= hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/signals/test_signal_or_acq_rel_ordering.c b/src/core/signals/test_signal_or_acq_rel_ordering.c new file mode 100644 index 0000000..94cb484 --- /dev/null +++ b/src/core/signals/test_signal_or_acq_rel_ordering.c @@ -0,0 +1,176 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_or_release_ordering + * + * Purpose: + * Verify ordering feature of signal operation + * + * Description: + * + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to have + * the last bit set. + * Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * Start one thread that + * a) Checks the value of y in a loop using hsa_cas_acq_rel using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set . + * d) Replaces the x values by using signal_load_relaxed to set all bits, + * and then uses signal_and_relaxed to mask out all bits but the first. + * e) Replaces the value of y by using signal_load_relaxed to set all bits, + * and then uses signal_or_acq_rel to mask out all bits but the first. + * f) Starts over. + * Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * Let both threads run for 32K iterations. + * + */ + +#include +#include +#include +#include "config.h" + +typedef struct { + hsa_signal_t* signal_x; + hsa_signal_t signal_y; +} param; + +static void* test_signal_or_acq_rel_ordering_t1(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (1 != hsa_signal_cas_acq_rel(signal_y, 1, 0)); + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(1 == value, "The value of signal_x[%d] is not equal to 1!\n", ii); + // Set all bits of each x and mask out all bits but first + hsa_signal_store_relaxed(signal_x[ii], 0); + hsa_signal_or_relaxed(signal_x[ii], FIRST_BIT); + } + hsa_signal_store_relaxed(signal_y, 0); + hsa_signal_or_acq_rel(signal_y, FIRST_BIT); + } + return NULL; +} + +static void* test_signal_or_acq_rel_ordering_t2(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (FIRST_BIT != hsa_signal_cas_acq_rel(signal_y, FIRST_BIT, 0)); + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(FIRST_BIT == value, "The value of x[%d] is not equal to the min negative value!\n", ii); + // Set all bits of each x and mask out all bits but last + hsa_signal_store_relaxed(signal_x[ii], 0); + hsa_signal_or_relaxed(signal_x[ii], 1); + } + hsa_signal_store_relaxed(signal_y, 0); + hsa_signal_or_acq_rel(signal_y, 1); + } + return NULL; +} + +int test_signal_or_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_x[NUM_SIGNAL], signal_y; + // Set value of signal_x and signal_y to 1 + hsa_signal_value_t initial_value = 1; + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + status = hsa_signal_create(initial_value, 0, NULL, &signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_create(initial_value, 0, NULL, &signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + param arg[2]; + arg[0].signal_x = signal_x; + arg[0].signal_y = signal_y; + arg[1].signal_x = signal_x; + arg[1].signal_y = signal_y; + + pthread_t id[2]; + pthread_create(&id[0], NULL, test_signal_or_acq_rel_ordering_t1, &arg[0]); + pthread_create(&id[1], NULL, test_signal_or_acq_rel_ordering_t2, &arg[1]); + + pthread_join(id[0], NULL); + pthread_join(id[1], NULL); + + // Destroy signal + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + status = hsa_signal_destroy(signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_destroy(signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_or_acq_rel_ordering_transitive.c b/src/core/signals/test_signal_or_acq_rel_ordering_transitive.c new file mode 100644 index 0000000..89bd45a --- /dev/null +++ b/src/core/signals/test_signal_or_acq_rel_ordering_transitive.c @@ -0,0 +1,241 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_or_acq_rel_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_or_acq_rel API + * API enforces transitive memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to have their first bit set. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should have its last bit set and the z signal + * value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value has the last bit set. + * b) Uses signal_store_relaxed to clear all of the y bits and then calls + * signal_or_acq_rel using y as a parameter but not changing the value. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting each value to have the last bit set. + * d) Replaces the x values by using signal_store_relaxed clear all bits, + * and then signal_or_relaxed to set the first bit. + * e) Replaces the value of y by using signal_store_relaxed to clear all bits, + * and then uses signal_or_acq_rel to set the first bit. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of z is set, not the last. It also operates on signal z, + * not y. + * 5) Start a third thread that + * a) Waits until y has its first bit set using signal_wait_acquire. + * c) Sets the value of z to have its first bit set using signal_store_relaxed + * to clear all bits and then signal_or_release to set the first. + * e) Waits until z has only its last bit set with signal_wait_acquire. + * f) Sets the value y to have its last bit set using signal_store_relaxed + * to set all bits and then signal_or_release to set the last bit. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_or_acq_rel_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until last bit of y has been set or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != LAST_BIT) + if (y_val == -1) return; + + hsa_signal_store_relaxed(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // only last bit of every x should be set + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "only last bit of x value should be set\n"); + + // change first bit of x to 1 + hsa_signal_store_relaxed(x[ii], 0); + hsa_signal_or_relaxed(x[ii], FIRST_BIT); + } + + // set first bit of y + hsa_signal_or_acq_rel(y, FIRST_BIT); + } + return; +} + +void test_signal_or_acq_rel_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until first bit of z has been set or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != FIRST_BIT) + if (z_val == -1) return; + + hsa_signal_store_relaxed(z, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // only first bit of every x should be set + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "only first bit of x value should be set\n"); + + // change last bit of x + hsa_signal_store_relaxed(x[ii], 0); + hsa_signal_or_relaxed(x[ii], LAST_BIT); + } + + // set last bit of z + hsa_signal_or_acq_rel(z, LAST_BIT); + } + return; +} + +void test_signal_or_acq_rel_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until first bit of y has been set up + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, FIRST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set first bit of z + hsa_signal_store_relaxed(z, 0); + hsa_signal_or_release(z, FIRST_BIT); + + // wait until last bit of z has been set up + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, LAST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set last bit of y to 1 + hsa_signal_store_relaxed(y, 0); + hsa_signal_or_release(y, LAST_BIT); + } + + // set y to -1 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != FIRST_BIT); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + + return; +} + +int test_signal_or_acq_rel_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values with setting last bit + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(0, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_or_acq_rel_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_or_acq_rel_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_or_acq_rel_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_or_acquire_release_ordering.c b/src/core/signals/test_signal_or_acquire_release_ordering.c new file mode 100644 index 0000000..3581877 --- /dev/null +++ b/src/core/signals/test_signal_or_acquire_release_ordering.c @@ -0,0 +1,182 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_or_acquires_release_ordering + * + * Purpose: + * Verify that the hsa_signal_or_acquire and hsa_signal_or_release + * APIs enforce correct memory ordering. + * + * Description: + * + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to have + * the last bit set. + * Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * Start one thread that + * a) Checks the value of y in a loop using hsa_cas_relaxed using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Resets the value of y to zero using hsa_or_acquire. + * d) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set . + * e) Replaces the x values by using signal_load_relaxed to set all bits, + * and then uses signal_and_relaxed to mask out all bits but the first. + * f) Replaces the value of y by using signal_load_relaxed to set all bits, + * and then uses signal_or_release to mask out all bits but the first. + * g) Starts over. + * Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * Let both threads run for 32K iterations. + * + */ + +#include +#include +#include +#include "config.h" + +typedef struct { + hsa_signal_t* signal_x; + hsa_signal_t signal_y; +} param; + +static void* test_signal_or_acquire_release_t1(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (1 != hsa_signal_cas_relaxed(signal_y, 1, 0)); + hsa_signal_or_acquire(signal_y, 0); + + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(1 == value, "The value of signal_x[%d] is not equal to 1!\n", ii); + // Set all bits of each x and mask out all bits but first + hsa_signal_store_relaxed(signal_x[ii], 0); + hsa_signal_or_relaxed(signal_x[ii], FIRST_BIT); + } + hsa_signal_store_relaxed(signal_y, 0); + hsa_signal_or_release(signal_y, FIRST_BIT); + } + return NULL; +} + +static void* test_signal_or_acquire_release_t2(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (FIRST_BIT != hsa_signal_cas_relaxed(signal_y, FIRST_BIT, 0)); + hsa_signal_or_acquire(signal_y, 0); + + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(FIRST_BIT == value, "The value of x[%d] is not equal to the min negative value!\n", ii); + // Set all bits of each x and mask out all bits but last + hsa_signal_store_relaxed(signal_x[ii], 0); + hsa_signal_or_relaxed(signal_x[ii], 1); + } + hsa_signal_store_relaxed(signal_y, 0); + hsa_signal_or_release(signal_y, 1); + } + return NULL; +} + +int test_signal_or_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_x[NUM_SIGNAL], signal_y; + // Set value of signal_x and signal_y to 1 + hsa_signal_value_t initial_value = 1; + int ii; + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + status = hsa_signal_create(initial_value, 0, NULL, &signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_create(initial_value, 0, NULL, &signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + param arg[2]; + arg[0].signal_x = signal_x; + arg[0].signal_y = signal_y; + arg[1].signal_x = signal_x; + arg[1].signal_y = signal_y; + + pthread_t id[2]; + pthread_create(&id[0], NULL, test_signal_or_acquire_release_t1, &arg[0]); + pthread_create(&id[1], NULL, test_signal_or_acquire_release_t2, &arg[1]); + + pthread_join(id[0], NULL); + pthread_join(id[1], NULL); + + // Destroy signal + for (ii = 0; ii < NUM_SIGNAL; ++ii) { + status = hsa_signal_destroy(signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_destroy(signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_or_acquire_release_ordering_transitive.c b/src/core/signals/test_signal_or_acquire_release_ordering_transitive.c new file mode 100644 index 0000000..dd88a10 --- /dev/null +++ b/src/core/signals/test_signal_or_acquire_release_ordering_transitive.c @@ -0,0 +1,240 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_or_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_or_acquire and + * hsa_signal_or_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized + * to have their first bit set. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should have its last bit set and the z signal + * value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value has the last bit set. + * b) Uses signal_store_relaxed to clear all of the y bits and then calls + * signal_or_acquire using y as a parameter but not changing the value. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting each value to have the last bit set. + * d) Replaces the x values by using signal_store_relaxed clear all bits, + * and then signal_or_relaxed to set the first bit. + * e) Replaces the value of y by using signal_store_relaxed to clear all bits, + * and then uses signal_or_release to set the first bit. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of z is set, not the last. It also operates on signal z, + * not y. + * 5) Start a third thread that + * a) Waits until y has its first bit set using signal_wait_acquire. + * c) Sets the value of z to have its first bit set using signal_store_relaxed + * to clear all bits and then signal_or_release to set the first. + * e) Waits until z has only its last bit set with signal_wait_acquire. + * f) Sets the value y to have its last bit set using signal_store_relaxed + * to set all bits and then signal_or_release to set the last bit. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_or_acquire_release_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // Loop until last bit of y has been set or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != LAST_BIT) + if (y_val == -1) return; + + hsa_signal_store_relaxed(y, 0); + hsa_signal_or_acquire(y, 0); + + for (ii = 0; ii < NUM_X; ii++) { + // Only last bit of every x should be set + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "only last bit of x value should be set\n"); + + // Change first bit of x to 1 + hsa_signal_store_relaxed(x[ii], 0); + hsa_signal_or_relaxed(x[ii], FIRST_BIT); + } + + // Set first bit of y + hsa_signal_or_release(y, FIRST_BIT); + } +} + +void test_signal_or_acquire_release_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // Loop until first bit of z has been set or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != FIRST_BIT) + if (z_val == -1) return; + + hsa_signal_store_relaxed(z, 0); + hsa_signal_or_acquire(z, 0); + + for (ii = 0; ii < NUM_X; ii++) { + // Only first bit of every x should be set + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "only first bit of x value should be set\n"); + + // Change last bit of x + hsa_signal_store_relaxed(x[ii], 0); + hsa_signal_or_relaxed(x[ii], LAST_BIT); + } + + // Set last bit of z + hsa_signal_or_release(z, LAST_BIT); + } +} + +void test_signal_or_acquire_release_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ii++) { + // Wait until first bit of y has been set up + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, FIRST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Set first bit of z + hsa_signal_store_relaxed(z, 0); + hsa_signal_or_release(z, FIRST_BIT); + + // Wait until last bit of z has been set up + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, LAST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Set last bit of y to 1 + hsa_signal_store_relaxed(y, 0); + hsa_signal_or_release(y, LAST_BIT); + } + + // Set y to -1 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != FIRST_BIT); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); +} + +int test_signal_or_acquire_release_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ii++) { + // Initialize all x values with setting last bit + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // Initialize y to 0 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Initialize z to 1 + status = hsa_signal_create(0, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Create test_group + struct test_group *test = test_group_create(3); + + // Add test func one to the test group + test_group_add(test, test_signal_or_acquire_release_ordering_t1, NULL, 1); + + // Add test func two to the test_group + test_group_add(test, test_signal_or_acquire_release_ordering_t2, NULL, 1); + + // Add test func three to the test_group + test_group_add(test, test_signal_or_acquire_release_ordering_t3, NULL, 1); + + // Create threads for each test + test_group_thread_create(test); + + // Start test functions + test_group_start(test); + + // Wait all tests functions finish + test_group_wait(test); + + // Exit all tests + test_group_exit(test); + + // Cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ii++) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_or_atomic.c b/src/core/signals/test_signal_or_atomic.c new file mode 100644 index 0000000..34c89b7 --- /dev/null +++ b/src/core/signals/test_signal_or_atomic.c @@ -0,0 +1,742 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_or_atomic + * + * Purpose: + * Verify atomicity feature of signal operation + * + * Description: + * + * 1) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_or_acquire operation on the signal value. + * b) Thread 0 uses a rotating mask of ...0001, ..0001., .0001.., 0001..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...0010, ..0010., .0010.., 0010..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...0100, ..0100., .0100.., 0100..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...1000, ..1000., .1000.., 1000..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat OP_COUNT times. + * + * 2) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_or_release operation on the signal value. + * b) Thread 0 uses a rotating mask of ...0001, ..0001., .0001.., 0001..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...0010, ..0010., .0010.., 0010..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...0100, ..0100., .0100.., 0100..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...1000, ..1000., .1000.., 1000..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat OP_COUNT times. + * + * 3) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_or_relaxed operation on the signal value. + * b) Thread 0 uses a rotating mask of ...0001, ..0001., .0001.., 0001..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...0010, ..0010., .0010.., 0010..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...0100, ..0100., .0100.., 0100..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...1000, ..1000., .1000.., 1000..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat OP_COUNT times. + * + * 4) Create a signal, set every bit of signal to be 1, create + * 4 threads + * a) Each thread applies a hsa_signal_or_acq_rel operation on the signal value. + * b) Thread 0 uses a rotating mask of ...0001, ..0001., .0001.., 0001..., shifting + * the 0 16 bits per application. + * c) Thread 1 uses a rotating mask of ...0010, ..0010., .0010.., 0010..., shifting + * the 0 16 bits per application. + * d) Thread 2 uses a rotating mask of ...0100, ..0100., .0100.., 0100..., shifting + * the 0 16 bits per application. + * e) Thread 3 uses a rotating mask of ...1000, ..1000., .1000.., 1000..., shifting + * the 0 16 bits per application. + * After all threads finish, check if the final value is 0 and repeat OP_COUNT times. + * + */ + +#include +#include +#include +#include +#include "config.h" + +typedef struct test_group test_group; + +// Define a structure to pass parameter to child function +typedef struct { + volatile hsa_signal_t signal_handle; + volatile int num; +} param; + +static void child_func_acquire(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000000f; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x0000fff0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x00000000000000f0; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x000000f0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x0000000000000f00; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x00000f00; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000f000; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acquire(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + default: + break; + } + + return; +} + +static void child_func_release(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000000f; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_release(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x0000fff0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_rlease(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x00000000000000f0; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_release(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x000000f0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_release(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x0000000000000f00; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_release(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x00000f00; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_release(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000f000; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_release(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_release(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + default: + break; + } + return; +} + +static void child_func_relaxed(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000000f; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x0000fff0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x00000000000000f0; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x000000f0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x0000000000000f00; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x00000f00; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000f000; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_relaxed(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + default: + break; + } + return; +} + +static void child_func_acq_rel(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + + // Different thread behaves differently + switch (num) { + case 0: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000000f; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x0000fff0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 1: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x00000000000000f0; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x000000f0; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 2: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x0000000000000f00; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0x00000f00; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + case 3: + { + hsa_signal_value_t signal_value; + int ii; + #ifdef HSA_LARGE_MODEL + signal_value = 0x000000000000f000; + for (ii = 0; ii < 4; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #else + signal_value = 0xffff0fff; + for (ii = 0; ii < 2; ++ii) { + hsa_signal_or_acq_rel(signal_handle, signal_value); + signal_value = signal_value << 16; + } + #endif + break; + } + default: + break; + } + return; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_signal_or_atomic_acquire() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_acquire, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(-1 == loaded_value, "Signal value is not -1 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_signal_or_atomic_release() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_release, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(-1 == loaded_value, "Signal value is not -1 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_signal_or_atomic_relaxed() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_relaxed, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(-1 == loaded_value, "Signal value is not -1 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_signal_or_atomic_acq_rel() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + // Set all bits of initial value to 0 + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + } + // Add tests + for (ii = 0; ii < 4; ++ii) { + test_group_add(group_ptr, child_func_acq_rel, param_ptr+ii, 1); + } + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for OP_COUNT times + for (ii = 0; ii < OP_COUNT; ++ii) { + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure every bit of signal is set before next loop + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(-1 == loaded_value, "Signal value is not -1 which is expected!\n"); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_or_release_ordering.c b/src/core/signals/test_signal_or_release_ordering.c new file mode 100644 index 0000000..d02a941 --- /dev/null +++ b/src/core/signals/test_signal_or_release_ordering.c @@ -0,0 +1,182 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_or_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_or_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized to have + * the last bit set. + * 2) Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * 3) Start one thread that + * a) Checks the value of y in a loop using hsa_cas_acquire using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set. + * d) Replaces the x values by using signal_load_relaxed to set them to 0, + * and then uses signal_or_relaxed to set the first bit. + * d) Replaces the value of y by using signal_load_relaxed to set the value to 0, + * and then uses signal_or_release to set the first bit. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + * + **/ + +#include +#include +#include +#include "config.h" + +typedef struct { + hsa_signal_t* signal_x; + hsa_signal_t signal_y; +} param; + +static void* test_signal_or_release_t1(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (1 != hsa_signal_cas_acquire(signal_y, 1, 0)); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(1 == value, "The value of signal_x[%d] is not equal to 1!\n", ii); + // Set all bits of each x and mask out all bits but first + hsa_signal_store_relaxed(signal_x[ii], 0); + hsa_signal_or_relaxed(signal_x[ii], FIRST_BIT); + } + hsa_signal_store_relaxed(signal_y, 0); + hsa_signal_or_release(signal_y, FIRST_BIT); + } + return NULL; +} + +static void* test_signal_or_release_t2(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_t* signal_x = param_ptr->signal_x; + hsa_signal_t signal_y = param_ptr->signal_y; + + hsa_signal_value_t value; + int jj; + for (jj = 0; jj < NUM_ITERATION; ++jj) { + while (FIRST_BIT != hsa_signal_cas_acquire(signal_y, FIRST_BIT, 0)); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + value = hsa_signal_load_relaxed(signal_x[ii]); + ASSERT_MSG(FIRST_BIT == value, "The value of x[%d] is not equal to the min negative value!\n", ii); + // Set all bits of each x and mask out all bits but last + hsa_signal_store_relaxed(signal_x[ii], 0); + hsa_signal_or_relaxed(signal_x[ii], 1); + } + hsa_signal_store_relaxed(signal_y, 0); + hsa_signal_or_release(signal_y, 1); + } + return NULL; +} + +int test_signal_or_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_x[1024], signal_y; + // Set value of signal_x and signal_y to 1 + hsa_signal_value_t initial_value = 1; + int ii; + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_create(initial_value, 0, NULL, &signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_create(initial_value, 0, NULL, &signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + param arg[2]; + arg[0].signal_x = signal_x; + arg[0].signal_y = signal_y; + arg[1].signal_x = signal_x; + arg[1].signal_y = signal_y; + + pthread_t id[2]; + pthread_create(&id[0], NULL, test_signal_or_release_t1, &arg[0]); + pthread_create(&id[1], NULL, test_signal_or_release_t2, &arg[1]); + + pthread_join(id[0], NULL); + pthread_join(id[1], NULL); + + // Destroy signal + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_destroy(signal_x[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_signal_destroy(signal_y); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_store_release_load_acquire_ordering.c b/src/core/signals/test_signal_store_release_load_acquire_ordering.c new file mode 100644 index 0000000..e92b11c --- /dev/null +++ b/src/core/signals/test_signal_store_release_load_acquire_ordering.c @@ -0,0 +1,177 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_store_release_load_acquire_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_load_acquire and + * hsa_signal_store_release APIs enforce correct memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 0. + * 2) Create a control signal, denoted by y, also initialized + * to 0. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_load_acquire. + * b) When the value of y is 0, the thread stops looping and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 0. + * d) Changes all of the x signal values to 1 using the + * signal_store_relaxed API. + * e) Sets the value of y to 1, using the signal_store_release API. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 0 in place of 1 and 1 in place of 0. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the x values for the first thread should be 1 + * and the x values for the second thread should be 0, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +void test_signal_store_release_load_acquire_t1(void *data) { + int ii, jj; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // check if y equals to 0 + while (hsa_signal_load_acquire(y) != 0); + + for (jj = 0; jj < NUM_X; ++jj) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[jj]); + ASSERT(sig_val_t1 == 0); + + // change x value to 1 + hsa_signal_store_relaxed(x[jj], 1); + } + + // change y to 1 + hsa_signal_store_release(y, 1); + } + return; +} + +void test_signal_store_release_load_acquire_t2(void *data) { + int ii, jj; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // check if y equals to 1 + while (hsa_signal_load_acquire(y) != 1); + + for (jj = 0; jj < NUM_X; ++jj) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[jj]); + ASSERT(sig_val_t1 == 1); + + // change x value to 0 + hsa_signal_store_relaxed(x[jj], 0); + } + + // change y to 0 + hsa_signal_store_release(y, 0); + } + return; +} + +int test_signal_store_release_load_acquire_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize every x value to 1 + status = hsa_signal_create(1, 0, NULL, &x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + struct test_group *tg_t1 = test_group_create(1); + test_group_add(tg_t1, test_signal_store_release_load_acquire_t1, NULL, 1); + test_group_thread_create(tg_t1); + + struct test_group *tg_t2 = test_group_create(1); + test_group_add(tg_t2, test_signal_store_release_load_acquire_t2, NULL, 1); + test_group_thread_create(tg_t2); + + test_group_start(tg_t1); + test_group_start(tg_t2); + + test_group_wait(tg_t1); + test_group_wait(tg_t2); + + test_group_exit(tg_t1); + test_group_exit(tg_t2); + + test_group_destroy(tg_t1); + test_group_destroy(tg_t2); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + return 0; +} diff --git a/src/core/signals/test_signal_store_release_load_acquire_ordering_transitive.c b/src/core/signals/test_signal_store_release_load_acquire_ordering_transitive.c new file mode 100644 index 0000000..ef0fd45 --- /dev/null +++ b/src/core/signals/test_signal_store_release_load_acquire_ordering_transitive.c @@ -0,0 +1,237 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_store_release_load_acquire_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_load_acquire and + * hsa_signal_store_release APIs enforce correct memory + * ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 0. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 0 and the z signal value should be 0. + * 3) Start one thread that + * a) Check the value of y in a loop using signal_load_acquire. + * b) When the value of y is 0, the thread stops looping and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 0. + * d) Changes all of the x signal values to 1 using the + * signal_store_relaxed API. + * e) Sets the value of y to 1, using the signal_store_release API. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 0 in place of 1 and 1 in place of 0, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Loops on the value of y using signal_load_acquire to get the value. + * b) When the value of y is 1, the thread stops looping and + * c) Sets the value of z to 1 using signal_store_release. + * d) Loops the value of z using signal_load_acquire to get the value. + * e) When the value of z is 0, the thread stops looping and + * f) Set the value of y to 0 and starts over. + * g) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the x values for the first thread should be 1 + * and the x values for the second thread should be 0, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_store_release_load_acquire_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop till y = 0 or y = -1 + while ((y_val = hsa_signal_load_acquire(y)) != 0) { + if (y_val == -1) { + return; + } + } + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 0, "signal value should be 0, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 1); + } + + // set y to 1 + hsa_signal_store_release(y, 1); + } + return; +} + + +void test_signal_store_release_load_acquire_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop till z = 0 or z = -1 + while ((z_val = hsa_signal_load_acquire(z)) != 1) { + if (z_val == -1) { + return; + } + } + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 1, "signal value should be 0, but is %d\n", sig_val_t1); + + // change x value to 1 + hsa_signal_store_relaxed(x[ii], 0); + } + + // set z to 1 + hsa_signal_store_release(z, 0); + } + return; +} + +void test_signal_store_release_load_acquire_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // loop until y = 1 + while (hsa_signal_load_acquire(y) != 1); + + // set z to 0 + hsa_signal_store_release(z, 1); + + // loop until z = 1 + while (hsa_signal_load_acquire(z) != 0); + + // set y to 0 + hsa_signal_store_release(y, 0); + } + + // set y to -1 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 1); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + + return; +} + +int test_signal_store_release_load_acquire_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(0, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(0, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(0, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_store_release_load_acquire_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_store_release_load_acquire_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_store_release_load_acquire_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_subtract_acq_rel_ordering.c b/src/core/signals/test_signal_subtract_acq_rel_ordering.c new file mode 100644 index 0000000..d85cb65 --- /dev/null +++ b/src/core/signals/test_signal_subtract_acq_rel_ordering.c @@ -0,0 +1,187 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_subtract_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_subtract_acq_rel API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to 2. + * 2) Create a control signal, denoted by y, also initialized to -1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acq_rel using + * 0 as the exchange value. + * b) When the value of y is -1 it sets the value, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2, and replacing it with a value of 1 by + * calling signal_subtract_relaxed to subtract 1 from the value. + * e) Sets the value of y to -2, using the signal_subtract_acq_rel API + * and a subtract value of 2. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * except that it waits for a value of -2, expects the x signal values to be 1, + * changes the x signal values by subtracting -1 and sets the value of y to -1 + * by subtracting -1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. +*/ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if y equal to -1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to -2 +void test_signal_subtract_acq_rel_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to -1 + while (hsa_signal_cas_acq_rel(y, -1, 0) != -1); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 2 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t1 == 2); + + // change x value to 1 + hsa_signal_subtract_relaxed(x[ii], 1); + } + + // change y to -2 + hsa_signal_subtract_acq_rel(y, 2); + } + return; +} + + +// test func two: +// check y, if y equal to -2, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to -1 +void test_signal_subtract_acq_rel_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to -2 + while (hsa_signal_cas_acq_rel(y, -2, 0) != -2); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t2 == 1); + + // change x value to 2 + hsa_signal_subtract_relaxed(x[ii], -1); + } + + // change y to -1 + hsa_signal_subtract_acq_rel(y, 1); + } + return; +} + +int test_signal_subtract_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(-1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_subtract_acq_rel_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_subtract_acq_rel_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_subtract_acq_rel_ordering_transitive.c b/src/core/signals/test_signal_subtract_acq_rel_ordering_transitive.c new file mode 100644 index 0000000..32bc4d9 --- /dev/null +++ b/src/core/signals/test_signal_subtract_acq_rel_ordering_transitive.c @@ -0,0 +1,236 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_subtract_acq_rel_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_subtract_acq_rel API + * enforces transitive memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value is 1. + * b) Uses signal_subtract_acq_rel to decrement the value of y to 0. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the signal_subtract_relaxed + * API by subtracting 1 to the value. + * e) Sets the value of y to 2, using the signal_subtract_acq_rel API to subtract + * -2 to the signal value. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until y is 2 using signal_wait_acquire. + * c) Sets the value of z to 2 using signal_cas_release with + * 1 as the condition. + * e) Waits until z is 1 using signal_wait_acquire. + * f) Set the value of y to 1 using signal_cas_release with + * 2 as the condition. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_subtract_acq_rel_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // Loop until y = 1 or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != 1) + if (y_val == -1) return; + + hsa_signal_subtract_acq_rel(y, 1); + + for (ii = 0; ii < NUM_X; ++ii) { + // Every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // Change x value to 1 + hsa_signal_subtract_relaxed(x[ii], 1); + } + + // Set y to 2 + hsa_signal_subtract_acq_rel(y, -2); + } + return; +} + + +void test_signal_subtract_acq_rel_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // Loop until z = 2 or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != 2) + if (z_val == -1) return; + + hsa_signal_subtract_acq_rel(z, 2); + + for (ii = 0; ii < NUM_X; ++ii) { + // Every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // Change x value to 1 + hsa_signal_subtract_relaxed(x[ii], -1); + } + + // Set z to 1 + hsa_signal_subtract_acq_rel(z, -1); + } + return; +} + + +void test_signal_subtract_acq_rel_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // Wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Set z to 0 + hsa_signal_cas_release(z, 1, 2); + + // Loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Set y to 0 + hsa_signal_cas_release(y, 2, 1); + } + + // Set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + + return; +} + +int test_signal_subtract_acq_rel_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // Initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // Initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Initialize z to 1 + status = hsa_signal_create(1, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Create test_group + struct test_group *test = test_group_create(3); + + // Add test func one to the test group + test_group_add(test, test_signal_subtract_acq_rel_ordering_t1, NULL, 1); + + // Add test func two to the test_group + test_group_add(test, test_signal_subtract_acq_rel_ordering_t2, NULL, 1); + + // Add test func three to the test_group + test_group_add(test, test_signal_subtract_acq_rel_ordering_t3, NULL, 1); + + // Create threads for each test + test_group_thread_create(test); + + // Start test functions + test_group_start(test); + + // Wait all tests functions finish + test_group_wait(test); + + // Exit all tests + test_group_exit(test); + + // Cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_subtract_acquire_ordering.c b/src/core/signals/test_signal_subtract_acquire_ordering.c new file mode 100644 index 0000000..1385c87 --- /dev/null +++ b/src/core/signals/test_signal_subtract_acquire_ordering.c @@ -0,0 +1,187 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_subtract_acquire_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_subtract_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized to 1. + * 2) Create a control signal, denoted by y, also initialized to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acq_rel using + * 0 as the exchange value. + * b) When the value of y is -1 it sets the value, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2, and replacing it with a value of 1 by + * calling signal_subtract_relaxed to subtract 1 from the value. + * e) Sets the value of y to -2, using the signal_subtract_acquire API + * and a subtract value of 2. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * except that it waits for a value of -2, expects the x signal values to be 1, + * changes the x signal values by subtracting -1 and sets the value of y to -1 + * by subtracting -1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[1024]; + +// test func one: +// check y, if y equal to -1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to -2 +void test_signal_subtract_acquire_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_acq_rel(y, 1, 0) != 1); + + for (ii = 0; ii < 1024; ++ii) { + // every x value should equal to 2 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t1 == 2); + + // change x value to 1 + hsa_signal_subtract_relaxed(x[ii], 1); + } + + // change y to -2 + hsa_signal_subtract_acquire(y, 2); + } + return; +} + + +// test func two: +// check y, if y equal to -2, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to -1 +void test_signal_subtract_acquire_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to -2 + while (hsa_signal_cas_acq_rel(y, -2, 0) != -2); + + for (ii = 0; ii < 1024; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t2 == 1); + + // change x value to 2 + hsa_signal_subtract_relaxed(x[ii], -1); + } + + // change y to -1 + hsa_signal_subtract_acquire(y, 1); + } + return; +} + +int test_signal_subtract_acquire_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + // initialize all x values to 1 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_subtract_acquire_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_subtract_acquire_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_subtract_acquire_release_ordering_transitive.c b/src/core/signals/test_signal_subtract_acquire_release_ordering_transitive.c new file mode 100644 index 0000000..6bdeb4b --- /dev/null +++ b/src/core/signals/test_signal_subtract_acquire_release_ordering_transitive.c @@ -0,0 +1,235 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_subtract_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_subtract_acquire and + * hsa_signal_subtract_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized + * to 2. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should be 1 and the z signal value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value is 1. + * b) Uses signal_subtract_acquire to decrement the value of y to 0. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2. + * d) Changes all of the x signal values to 1 using the signal_subtract_relaxed + * API by subtracting 1 to the value. + * e) Sets the value of y to 2, using the signal_subtract_release API to subtract + * -2 to the signal value. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but uses 1 in place of 2 and 2 in place of 1, and operates on signal z + * instead of signal y. + * 5) Start a third thread that + * a) Waits until y is 2 using signal_wait_acquire. + * c) Sets the value of z to 2 using signal_cas_release with + * 1 as the condition. + * e) Waits until z is 1 using signal_wait_acquire. + * f) Set the value of y to 1 using signal_cas_release with + * 2 as the condition. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_subtract_acquire_release_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // Loop until y = 1 or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != 1) { + if (y_val == -1) return; + } + + hsa_signal_subtract_acquire(y, 1); + + for (ii = 0; ii < NUM_X; ii++) { + // Every x value should equal to 0 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == 2, "signal value should be 2, but is %d\n", sig_val_t1); + + // Change x value to 1 + hsa_signal_subtract_relaxed(x[ii], 1); + } + + // Set y to 2 + hsa_signal_subtract_release(y, -2); + } +} + + +void test_signal_subtract_acquire_release_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // Loop until z = 2 or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != 2) { + if (z_val == -1) return; + } + + hsa_signal_subtract_acquire(z, 2); + + for (ii = 0; ii < NUM_X; ii++) { + // Every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == 1, "signal value should be 1, but is %d\n", sig_val_t2); + + // Change x value to 1 + hsa_signal_subtract_relaxed(x[ii], -1); + } + + // Set z to 1 + hsa_signal_subtract_release(z, -1); + } +} + + +void test_signal_subtract_acquire_release_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ii++) { + // Wait until y = 1 + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, 2, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Set z to 0 + hsa_signal_cas_release(z, 1, 2); + + // Loop until z = 1 + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Set y to 0 + hsa_signal_cas_release(y, 2, 1); + } + + // Set y to 2 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != 2); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); +} + +int test_signal_subtract_acquire_release_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ii++) { + // Initialize all x values to 2 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // Initialize y to 0 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Initialize z to 1 + status = hsa_signal_create(1, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // Create test_group + struct test_group *test = test_group_create(3); + + // Add test func one to the test group + test_group_add(test, test_signal_subtract_acquire_release_ordering_t1, NULL, 1); + + // Add test func two to the test_group + test_group_add(test, test_signal_subtract_acquire_release_ordering_t2, NULL, 1); + + // Add test func three to the test_group + test_group_add(test, test_signal_subtract_acquire_release_ordering_t3, NULL, 1); + + // Create threads for each test + test_group_thread_create(test); + + // Start test functions + test_group_start(test); + + // Wait all tests functions finish + test_group_wait(test); + + // Exit all tests + test_group_exit(test); + + // Cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ii++) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_subtract_atomic.c b/src/core/signals/test_signal_subtract_atomic.c new file mode 100644 index 0000000..6da029a --- /dev/null +++ b/src/core/signals/test_signal_subtract_atomic.c @@ -0,0 +1,366 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_subtract_atomic + * + * Purpose: + * Verify atomicity feature of signal operation + * + * Description: + * + * 1) Create a signal, assigning it an initial value of 0. + * Create 4 threads, which call hsa_signal_subtract_acquire + * in a loop to add 1 to signal for one million times. + * After threads finish, check if the value is correct, and + * repeat this process for several times. + * + * 2) Create a signal, assigning it an initial value of 0. + * Create 4 threads, which call hsa_signal_subtract_release + * in a loop to add 1 to signal for one million times. + * After threads finish, check if the value is correct, and + * repeat this process for several times. + * + * 3) Create a signal, assigning it an initial value of 0. + * Create 4 threads, which call hsa_signal_subtract_relaxed + * in a loop to add 1 to signal for one million times. + * After threads finish, check if the value is correct, and + * repeat this process for several times. + * + * 4) Create a signal, assigning it an initial value of 0. + * Create 4 threads, which call hsa_signal_subtract_acq_rel + * in a loop to add 1 to signal for one million times. + * After threads finish, check if the value is correct, and + * repeat this process for several times. + * + */ + +#include +#include +#include +#include "config.h" + +typedef struct test_group test_group; + +static void child_func_acquire(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_subtract_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_subtract_acquire(*signal_handle, 1); + } + return; +} + +static void child_func_release(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_subtract_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_subtract_release(*signal_handle, 1); + } + return; +} + +static void child_func_relaxed(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_subtract_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_subtract_relaxed(*signal_handle, 1); + } + return; +} + +static void child_func_acq_rel(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + hsa_signal_t* signal_handle = (hsa_signal_t*)data; + + // Call hsa_signal_subtract_acquire in a loop + int ii; + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_subtract_acq_rel(*signal_handle, 1); + } + return; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_signal_subtract_atomic_acquire() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = GROUP_SIZE*OP_COUNT; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_acquire, &signal_handle, 4); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not GROUP_SIZE * OP_COUNT which is expected!\n"); + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to GROUP_SIZE*OP_COUNT + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + hsa_signal_store_release(signal_handle, GROUP_SIZE*OP_COUNT); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_signal_subtract_atomic_release() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = GROUP_SIZE*OP_COUNT; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_release, &signal_handle, 4); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not GROUP_SIZE * OP_COUNT which is expected!\n"); + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to GROUP_SIZE*OP_COUNT + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + hsa_signal_store_release(signal_handle, GROUP_SIZE*OP_COUNT); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_signal_subtract_atomic_relaxed() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = GROUP_SIZE*OP_COUNT; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_relaxed, &signal_handle, 4); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not GROUP_SIZE * OP_COUNT which is expected!\n"); + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to GROUP_SIZE*OP_COUNT + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + hsa_signal_store_release(signal_handle, GROUP_SIZE*OP_COUNT); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_signal_subtract_atomic_acq_rel() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = GROUP_SIZE*OP_COUNT; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of GROUP_SIZE + test_group* group_ptr = NULL; + group_ptr = test_group_create(GROUP_SIZE); + ASSERT(NULL != group_ptr); + // Add tests + test_group_add(group_ptr, child_func_acq_rel, &signal_handle, 4); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running for 10 times + int ii; + for (ii = 0; ii < TEST_COUNT; ++ii) { + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG((GROUP_SIZE * OP_COUNT) == loaded_value, "Signal value is not GROUP_SIZE * OP_COUNT which is expected!\n"); + test_group_start(group_ptr); + test_group_wait(group_ptr); + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to GROUP_SIZE*OP_COUNT + loaded_value = hsa_signal_load_relaxed(signal_handle); + ASSERT_MSG(0 == loaded_value, "Signal value is not 0 which is expected!\n"); + hsa_signal_store_release(signal_handle, GROUP_SIZE*OP_COUNT); + } + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_subtract_release_ordering.c b/src/core/signals/test_signal_subtract_release_ordering.c new file mode 100644 index 0000000..6aa5db6 --- /dev/null +++ b/src/core/signals/test_signal_subtract_release_ordering.c @@ -0,0 +1,187 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + + /* + * Test Name: signal_subtract_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_subtract_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized to 1. + * 2) Create a control signal, denoted by y, also initialized to 1. + * 3) Start one thread that + * a) Check the value of y in a loop using hsa_cas_acquire using + * 0 as the exchange value. + * b) When the value of y is -1 it sets the value, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting a value of 2, and replacing it with a value of 1 by + * calling signal_subtract_relaxed to subtract 1 from the value. + * e) Sets the value of y to -2, using the signal_subtract_release API + * and a subtract value of 2. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * except that it waits for a value of -2, expects the x signal values to be 1, + * changes the x signal values by subtracting -1 and sets the value of y to -1 + * by subtracting -1. + * 5) Let both threads run for millions of iterations. + * + * Expected Results: For each cycle, the reported x values for the first thread should be 2 + * and the reported x values for the second thread should be 1, i.e. all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[1024]; + +// test func one: +// check y, if y equal to -1, set y to 0, and then check if all x values equal to 2 and set them to 1, and then set y value to -2 +void test_signal_subtract_release_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to -1 + while (hsa_signal_cas_acquire(y, 1, 0) != 1); + + for (ii = 0; ii < 1024; ++ii) { + // every x value should equal to 2 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t1 == 2); + + // change x value to 1 + hsa_signal_subtract_relaxed(x[ii], 1); + } + + // change y to -2 + hsa_signal_subtract_release(y, 2); + } + return; +} + + +// test func two: +// check y, if y equal to -2, set y to 0, and then check if all x values equal to 1 and set them to 2, and then set y value to -1 +void test_signal_subtract_release_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to -2 + while (hsa_signal_cas_acquire(y, -2, 0) != -2); + + for (ii = 0; ii < 1024; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT(sig_val_t2 == 1); + + // change x value to 0 + hsa_signal_subtract_relaxed(x[ii], 1); + } + + // change y to -1 + hsa_signal_subtract_release(y, 1); + } + return; +} + +int test_signal_subtract_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + // initialize all x values to 1 + status = hsa_signal_create(2, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 1 + status = hsa_signal_create(1, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_subtract_release_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_subtract_release_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_update_basic.c b/src/core/signals/test_signal_update_basic.c new file mode 100644 index 0000000..2a1e207 --- /dev/null +++ b/src/core/signals/test_signal_update_basic.c @@ -0,0 +1,326 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_update_basic + * Scope: Conformance + * + * Purpose: Verifies that all of the signal modification APIs are functional + * in a single thread. + * + * Test Description: + * 1) Create a signal. + * 2) For each of the signal modification APIs + * a) Change the signal value to a new value. + * b) Read back the signal value with hsa_signal_load + * 3) The modification APIs under test are: + * - hsa_signal_store_(acquire|relaxed) + * - hsa_signal_exchange_(acq_rel|acquire|relaxed|release) + * - hsa_signal_cas_(acq_rel|acquire|relaxed|release) + * - hsa_signal_add_(acq_rel|acquire|relaxed|release) + * - hsa_signal_subtract_(acq_rel|acquire|relaxed|release) + * - hsa_signal_and_(acq_rel|acquire|relaxed|release) + * - hsa_signal_or_(acq_rel|acquire|relaxed|release) + * - hsa_signal_xor_(acq_rel|acquire|relaxed|release) + * + * Expected Results: The hsa_signal_load should return the expected value + * after each modification. + * + */ + +#include +#include + +void hsa_signal_store_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + hsa_signal_store_release(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(1 == value); + + hsa_signal_store_relaxed(signal, 0); + value = hsa_signal_load_relaxed(signal); + ASSERT(0 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +void hsa_signal_exchange_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + value = hsa_signal_exchange_acquire(signal, 1); + ASSERT(0 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(1 == value); + + value = hsa_signal_exchange_acq_rel(signal, 0); + ASSERT(1 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(0 == value); + + value = hsa_signal_exchange_release(signal, 1); + ASSERT(0 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(1 == value); + + value = hsa_signal_exchange_relaxed(signal, 0); + ASSERT(1 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(0 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +void hsa_signal_cas_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + value = hsa_signal_cas_acquire(signal, 0, 1); + ASSERT(0 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(1 == value); + + value = hsa_signal_cas_acq_rel(signal, 1, 0); + ASSERT(1 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(0 == value); + + value = hsa_signal_cas_release(signal, 0, 1); + ASSERT(0 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(1 == value); + + value = hsa_signal_cas_relaxed(signal, 1, 0); + ASSERT(1 == value); + value = hsa_signal_load_relaxed(signal); + ASSERT(0 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +void hsa_signal_add_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + hsa_signal_add_acquire(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(1 == value); + + hsa_signal_add_acq_rel(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(2 == value); + + hsa_signal_add_release(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(3 == value); + + hsa_signal_add_relaxed(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(4 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +void hsa_signal_subtract_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(4, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + hsa_signal_subtract_acquire(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(3 == value); + + hsa_signal_subtract_acq_rel(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(2 == value); + + hsa_signal_subtract_release(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(1 == value); + + hsa_signal_subtract_relaxed(signal, 1); + value = hsa_signal_load_relaxed(signal); + ASSERT(0 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +void hsa_signal_and_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(0x1111, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + hsa_signal_and_acquire(signal, 0x0111); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0111 == value); + + hsa_signal_and_acq_rel(signal, 0x0011); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0011 == value); + + hsa_signal_and_release(signal, 0x0001); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0001 == value); + + hsa_signal_and_relaxed(signal, 0x0000); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0000 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +void hsa_signal_or_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(0x0000, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + hsa_signal_or_acquire(signal, 0x0001); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0001 == value); + + hsa_signal_or_acq_rel(signal, 0x0010); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0011 == value); + + hsa_signal_or_release(signal, 0x0100); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0111 == value); + + hsa_signal_or_relaxed(signal, 0x1000); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x1111 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +void hsa_signal_xor_update_basic() { + hsa_signal_t signal; + hsa_status_t status = hsa_signal_create(0x0000, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + hsa_signal_xor_acquire(signal, 0x0001); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0001 == value); + + hsa_signal_xor_acq_rel(signal, 0x0010); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0011 == value); + + hsa_signal_xor_release(signal, 0x0100); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x0111 == value); + + hsa_signal_xor_relaxed(signal, 0x1000); + value = hsa_signal_load_relaxed(signal); + ASSERT(0x1111 == value); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +int test_signal_update_basic() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_store_update_basic(); + hsa_signal_exchange_update_basic(); + hsa_signal_cas_update_basic(); + hsa_signal_add_update_basic(); + hsa_signal_subtract_update_basic(); + hsa_signal_and_update_basic(); + hsa_signal_or_update_basic(); + hsa_signal_xor_update_basic(); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_value_width.c b/src/core/signals/test_signal_value_width.c new file mode 100644 index 0000000..4bd9bbc --- /dev/null +++ b/src/core/signals/test_signal_value_width.c @@ -0,0 +1,87 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_value_width + * Scope: Conformance + * + * Purpose: Verifies that hsa_signal_value_t data type has the correct + * width, given the specified machine model. + * + * Test Description: + * 1) Check the size of the hsa_signal_value_t data type. + * + * Expected Results: The width of the data type should be 32 bits + * if the machine model is small and 64 bits if the machine model + * is large. + * + */ + +#include +#include +#include + +int test_signal_value_width() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + struct utsname uts; + if (uname(&uts)) + return -1; + + int size = sizeof(hsa_signal_value_t); + + if (size == sizeof(int64_t)) { + ASSERT(strcmp(uts.machine, "x86_64") == 0); + } else if (size == sizeof(int32_t)) { + ASSERT((strcmp(uts.machine, "i686") == 0) || (strcmp(uts.machine, "i386") == 0)); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_wait_add.c b/src/core/signals/test_signal_wait_add.c new file mode 100644 index 0000000..dde3c59 --- /dev/null +++ b/src/core/signals/test_signal_wait_add.c @@ -0,0 +1,130 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_wait_add +* +* Purpose: +* Verify atomicity feature of signal operation +* +* Description: +* +* 1) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_acquire that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_add +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +* +* 2) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_relaxed that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_add +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +*/ + +#include +#include "config.h" +#include "test_signal_wait_utils.h" + +hsa_signal_value_t add_initial_val() { + return (hsa_signal_value_t) 0; +} + +hsa_signal_value_t add_wakeup_val(int indx) { + return (hsa_signal_value_t) indx + 1; +} + +hsa_signal_value_t add_set_val(int indx) { + return (hsa_signal_value_t) 1; +} + +/** +* +* @Brief: +* Implement Description #1 +* +* @Return: +* int +* +*/ + +int test_signal_wait_acquire_add() { + // Test the various add signal operations with wait acquire + signal_wait_test_v2(hsa_signal_add_acquire, signal_wait_acquire_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_add_release, signal_wait_acquire_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_add_relaxed, signal_wait_acquire_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_add_acq_rel, signal_wait_acquire_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_signal_wait_relaxed_add() { + // Test the various add signal operations with wait relaxed + signal_wait_test_v2(hsa_signal_add_acquire, signal_wait_relaxed_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_add_release, signal_wait_relaxed_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_add_relaxed, signal_wait_relaxed_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_add_acq_rel, signal_wait_relaxed_test, add_initial_val, add_wakeup_val, add_set_val, NUM_THREADS); + return 0; +} diff --git a/src/core/signals/test_signal_wait_and.c b/src/core/signals/test_signal_wait_and.c new file mode 100644 index 0000000..2088261 --- /dev/null +++ b/src/core/signals/test_signal_wait_and.c @@ -0,0 +1,130 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_wait_and +* +* Purpose: +* Verify atomicity feature of signal operation +* +* Description: +* +* 1) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_acquire that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_and +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +* +* 2) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_relaxed that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_and +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +*/ + +#include +#include "config.h" +#include "test_signal_wait_utils.h" + +hsa_signal_value_t and_initial_val() { + return (hsa_signal_value_t) (uint64_t) -1; +} + +hsa_signal_value_t and_wakeup_val(int indx) { + return (hsa_signal_value_t) (((uint64_t) -1) << indx); +} + +hsa_signal_value_t and_set_val(int indx) { + return (hsa_signal_value_t) (((uint64_t) -1) << indx); +} + +/** +* +* @Brief: +* Implement Description #1 +* +* @Return: +* int +* +*/ + +int test_signal_wait_acquire_and() { + // Test the various and signal operations with wait acquire + signal_wait_test_v2(hsa_signal_and_acquire, signal_wait_acquire_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_and_release, signal_wait_acquire_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_and_relaxed, signal_wait_acquire_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_and_acq_rel, signal_wait_acquire_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_signal_wait_relaxed_and() { + // Test the various and signal operations with wait relaxed + signal_wait_test_v2(hsa_signal_and_acquire, signal_wait_relaxed_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_and_release, signal_wait_relaxed_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_and_relaxed, signal_wait_relaxed_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_and_acq_rel, signal_wait_relaxed_test, and_initial_val, and_wakeup_val, and_set_val, NUM_THREADS); + return 0; +} diff --git a/src/core/signals/test_signal_wait_cas.c b/src/core/signals/test_signal_wait_cas.c new file mode 100644 index 0000000..7e2930b --- /dev/null +++ b/src/core/signals/test_signal_wait_cas.c @@ -0,0 +1,134 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_wait_cas +* +* Purpose: +* Verify atomicity feature of signal operation +* +* Description: +* +* 1) Create a signal with an initial value of 0. +* Create NUM_THREADS threads +* Each thread should call hsa_signal_wait_acquire that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_cas +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +* +* 2) Create a signal with an initial value of 0. +* Create NUM_THREADS threads +* Each thread should call hsa_signal_wait_relaxed that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_cas +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +*/ + +#include +#include "config.h" +#include "test_signal_wait_utils.h" + +hsa_signal_value_t cas_initial_val() { + return (hsa_signal_value_t) 0; +} + +hsa_signal_value_t cas_wakeup_val(int indx) { + return (hsa_signal_value_t) indx + 1; +} + +hsa_signal_value_t cas_expect_val(int indx) { + return (hsa_signal_value_t) indx; +} + +hsa_signal_value_t cas_set_val(int indx) { + return (hsa_signal_value_t) indx + 1; +} + +/** +* +* @Brief: +* Implement Description #1 +* +* @Return: +* int +* +*/ + +int test_signal_wait_acquire_cas() { + // Test the various cas signal operations with wait acquire + signal_wait_test_v1(hsa_signal_cas_acquire, signal_wait_acquire_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + signal_wait_test_v1(hsa_signal_cas_release, signal_wait_acquire_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + signal_wait_test_v1(hsa_signal_cas_relaxed, signal_wait_acquire_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + signal_wait_test_v1(hsa_signal_cas_acq_rel, signal_wait_acquire_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_signal_wait_relaxed_cas() { + // Test the various cas signal operations with wait relaxed + signal_wait_test_v1(hsa_signal_cas_acquire, signal_wait_relaxed_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + signal_wait_test_v1(hsa_signal_cas_release, signal_wait_relaxed_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + signal_wait_test_v1(hsa_signal_cas_relaxed, signal_wait_relaxed_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + signal_wait_test_v1(hsa_signal_cas_acq_rel, signal_wait_relaxed_test, cas_initial_val, cas_wakeup_val, cas_expect_val, cas_set_val, NUM_THREADS); + return 0; +} diff --git a/src/core/signals/test_signal_wait_conditions.c b/src/core/signals/test_signal_wait_conditions.c new file mode 100644 index 0000000..83c5b5d --- /dev/null +++ b/src/core/signals/test_signal_wait_conditions.c @@ -0,0 +1,186 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_wait_conditions + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_wait APIs properly use all of the + * hsa_signal_condition_t specifiers. + * + * Test Description: + * 1) Create a signal. + * 2) For each of the hsa_signal_condition_t specifiers + * a) Set the signal's value and a compare value such that the + * condition is not satisfied. + * b) Wait on the signal using one of the hsa_signal_wait APIs + * c) In another thread, modify the signal value to satisfy the + * condition. + * 3) Repeat this for all hsa_signal_wait APIs + * + * Expected Results: The waiting thread should return from the wait API + * when the signal value satisfies the condition. + * + */ + +#include +#include +#include + +typedef struct wait_cb_s { + hsa_signal_t signal; + hsa_signal_t response_signal; + hsa_signal_condition_t wait_condition; + hsa_signal_value_t wait_value; + hsa_signal_value_t response_value; +} wait_cb_t; + +void signal_wait_relaxed_condition(void* data) { + wait_cb_t* wait_cb = (wait_cb_t*) data; + hsa_signal_wait_relaxed(wait_cb->signal, wait_cb->wait_condition, wait_cb->wait_value, + UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + hsa_signal_store_relaxed(wait_cb->response_signal, wait_cb->response_value); + + return; +} + +void signal_wait_acquire_condition(void* data) { + wait_cb_t* wait_cb = (wait_cb_t*) data; + hsa_signal_wait_acquire(wait_cb->signal, wait_cb->wait_condition,wait_cb->wait_value, + UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + hsa_signal_store_release(wait_cb->response_signal, wait_cb->response_value); + + return; +} + +void do_signal_wait_conditions(hsa_signal_condition_t wait_condition, + hsa_signal_value_t first_wait_value, + hsa_signal_value_t first_set_value, + hsa_signal_value_t second_wait_value, + hsa_signal_value_t second_set_value) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal; + status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t response_signal; + status = hsa_signal_create(0, 0, NULL, &response_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the control blocks + int ii; + wait_cb_t wait_cb[2]; + for (ii = 0; ii < 2; ++ii) { + wait_cb[ii].signal = signal; + wait_cb[ii].response_signal = response_signal; + wait_cb[ii].wait_condition = wait_condition; + } + + wait_cb[0].wait_value = first_wait_value; + wait_cb[0].response_value = first_set_value; + wait_cb[1].wait_value = second_wait_value; + wait_cb[1].response_value = second_set_value; + + // Initialize the thread group + struct test_group *test_group = test_group_create(2); + + // Add the specific scenarios + test_group_add(test_group, signal_wait_acquire_condition, &wait_cb[0], 1); + test_group_add(test_group, signal_wait_relaxed_condition, &wait_cb[1], 1); + + test_group_thread_create(test_group); + test_group_start(test_group); + + // Set the first value + hsa_signal_store_relaxed(signal, first_set_value); + // Wait for the response + hsa_signal_wait_relaxed(response_signal, HSA_SIGNAL_CONDITION_EQ, first_set_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Set the second value + hsa_signal_store_relaxed(signal, second_set_value); + // Wait for the response + hsa_signal_wait_relaxed(response_signal, HSA_SIGNAL_CONDITION_EQ, second_set_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + test_group_wait(test_group); + test_group_exit(test_group); + test_group_destroy(test_group); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return; +} + +int test_signal_wait_conditions_eq() { + do_signal_wait_conditions(HSA_SIGNAL_CONDITION_EQ, 1, 1, 2, 2); + return 0; +} + +int test_signal_wait_conditions_ne() { + do_signal_wait_conditions(HSA_SIGNAL_CONDITION_NE, 0, 1, 1, 2); + return 0; +} + +int test_signal_wait_conditions_lt() { + do_signal_wait_conditions(HSA_SIGNAL_CONDITION_LT, 0, -1, -1, -2); + return 0; +} + +int test_signal_wait_conditions_gte() { + do_signal_wait_conditions(HSA_SIGNAL_CONDITION_GTE, 1, 1, 2, 2); + do_signal_wait_conditions(HSA_SIGNAL_CONDITION_GTE, 1, 2, 3, 4); + return 0; +} + +int test_signal_wait_conditions() { + test_signal_wait_conditions_eq(); + test_signal_wait_conditions_ne(); + test_signal_wait_conditions_lt(); + test_signal_wait_conditions_gte(); + return 0; +} diff --git a/src/core/signals/test_signal_wait_exchange.c b/src/core/signals/test_signal_wait_exchange.c new file mode 100644 index 0000000..21691a2 --- /dev/null +++ b/src/core/signals/test_signal_wait_exchange.c @@ -0,0 +1,134 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_wait_exchange +* +* Purpose: +* Verify atomicity feature of signal operation +* +* Description: +* +* 1) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_acquire that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_exchange +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +* +* 2) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_relaxed that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_exchange +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +*/ + +#include +#include "config.h" +#include "test_signal_wait_utils.h" + +hsa_signal_value_t exchange_initial_val() { + return (hsa_signal_value_t) -1; +} + +hsa_signal_value_t exchange_wakeup_val(int indx) { + return (hsa_signal_value_t) indx; +} + +hsa_signal_value_t exchange_expect_val(int indx) { + return (hsa_signal_value_t) indx - 1; +} + +hsa_signal_value_t exchange_set_val(int indx) { + return (hsa_signal_value_t) indx; +} + +/** +* +* @Brief: +* Implement Description #1 +* +* @Return: +* int +* +*/ + +int test_signal_wait_acquire_exchange() { + // Test the various exchange signal operations with wait acquire + signal_wait_test_v3(hsa_signal_exchange_acquire, signal_wait_acquire_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + signal_wait_test_v3(hsa_signal_exchange_release, signal_wait_acquire_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + signal_wait_test_v3(hsa_signal_exchange_relaxed, signal_wait_acquire_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + signal_wait_test_v3(hsa_signal_exchange_acq_rel, signal_wait_acquire_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_signal_wait_relaxed_exchange() { + // Test the various exchange signal operations with wait relaxed + signal_wait_test_v3(hsa_signal_exchange_acquire, signal_wait_relaxed_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + signal_wait_test_v3(hsa_signal_exchange_release, signal_wait_relaxed_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + signal_wait_test_v3(hsa_signal_exchange_relaxed, signal_wait_relaxed_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + signal_wait_test_v3(hsa_signal_exchange_acq_rel, signal_wait_relaxed_test, exchange_initial_val, exchange_wakeup_val, exchange_expect_val, exchange_set_val, NUM_THREADS); + return 0; +} diff --git a/src/core/signals/test_signal_wait_expectancy.c b/src/core/signals/test_signal_wait_expectancy.c new file mode 100644 index 0000000..3fec898 --- /dev/null +++ b/src/core/signals/test_signal_wait_expectancy.c @@ -0,0 +1,143 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_wait_expectancy + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_wait APIs will perform + * properly when passed all values of the hsa_wait_expectancy_t + * parameter. + * + * Test Description: + * 1) Create a signal. + * 2) Specify a value, condition operator and compare value that + * create a unsatisfied condition. + * 3) Wait on the signal, using one of the hsa_wait_expectancy_t + * values. + * 4) In another thread, set the signal value so the condition is + * satisfied. + * 5) Repeat this for all possible hsa_wait_expectancy_t values. + * + * Expected Results: The wait API should return once the signal + * condition is satisfied. + */ + +#include +#include +#include + +typedef struct wait_cb_s { + hsa_signal_t signal; + hsa_signal_t response_signal; + hsa_wait_state_t wait_expectancy; + hsa_signal_value_t wait_value; + hsa_signal_value_t response_value; +} wait_cb_t; + +void signal_wait_expectancy(void* data) { + wait_cb_t* wait_cb = (wait_cb_t*) data; + hsa_signal_wait_relaxed(wait_cb->signal, HSA_SIGNAL_CONDITION_EQ, wait_cb->wait_value, + UINT64_MAX, wait_cb->wait_expectancy); + hsa_signal_store_release(wait_cb->response_signal, wait_cb->response_value); + + return; +} + + +int test_signal_wait_expectancy() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signals[2]; + hsa_signal_t response_signals[2]; + wait_cb_t wait_cbs[2]; + hsa_signal_value_t wait_value = 1; + int ii; + for (ii = 0; ii < 2; ++ii) { + // Initialize the signals with 0 + status = hsa_signal_create(0, 0, NULL, signals + ii); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_create(0, 0, NULL, response_signals + ii); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the callback data structures + wait_cbs[ii].signal = signals[ii]; + wait_cbs[ii].response_signal = response_signals[ii]; + wait_cbs[ii].wait_value = wait_value; + wait_cbs[ii].response_value = wait_value; + } + + wait_cbs[0].wait_expectancy = HSA_WAIT_STATE_BLOCKED; + wait_cbs[1].wait_expectancy = HSA_WAIT_STATE_ACTIVE; + + // Initialize the thread group + struct test_group *test_group = test_group_create(2); + + // Add the specific scenarios + test_group_add(test_group, signal_wait_expectancy, &wait_cbs[0], 1); + test_group_add(test_group, signal_wait_expectancy, &wait_cbs[1], 1); + + test_group_thread_create(test_group); + test_group_start(test_group); + + // Set the signal values + hsa_signal_store_relaxed(signals[0], wait_value); + hsa_signal_store_relaxed(signals[1], wait_value); + + // Wait for the response + hsa_signal_wait_relaxed(response_signals[0], HSA_SIGNAL_CONDITION_EQ, wait_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + hsa_signal_wait_relaxed(response_signals[1], HSA_SIGNAL_CONDITION_EQ, wait_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + test_group_wait(test_group); + test_group_exit(test_group); + test_group_destroy(test_group); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_wait_multiple_agents.c b/src/core/signals/test_signal_wait_multiple_agents.c new file mode 100644 index 0000000..61ee528 --- /dev/null +++ b/src/core/signals/test_signal_wait_multiple_agents.c @@ -0,0 +1,107 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_wait_multiple_agents + * Scope: Conformance + * + * Purpose: Verifies that multiple agents can wait on a signal signal. + * + * Test Description: + * 1) Generate a list of agents on the system. If there + * are less than two, pass the test and finish executing. + * + * 2) Create a signal. + * + * 3) On each agent, wait on the signal value. This can be done either + * with an HSA runtime API or a kernel, whichever is appropriate. + * + * 4) Set the signals value such that the conditions for all waiters + * become satisfied. + * + * Expected Results: All the agents should be able to wait on the signal, + * and all of the waiters should wake up once the condition on the signal + * is met. + */ + +#include +#include +#include + +int test_signal_wait_multiple_agents() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + struct agent_list_s agent_list; + + // Generate a list of agents + get_agent_list(&agent_list); + + // If there are less than two, pass the test and finish executing. + if (agent_list.num_agents < 2) { + return 0; + } + + // Create a signal + hsa_signal_t signal; + status = hsa_signal_create(0x0000, agent_list.num_agents, *(agent_list.agents), &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_value_t value; + + hsa_signal_store_relaxed(signal, 0x1000); + + hsa_signal_wait_relaxed(signal, HSA_EQ, 0x1000, UINT64_MAX, HSA_WAIT_EXPECTANCY_UNKNOWN); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + free_agent_list(&agent_list); + + return 0; +} diff --git a/src/core/signals/test_signal_wait_or.c b/src/core/signals/test_signal_wait_or.c new file mode 100644 index 0000000..4315ee4 --- /dev/null +++ b/src/core/signals/test_signal_wait_or.c @@ -0,0 +1,135 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_wait_or +* +* Purpose: +* Verify atomicity feature of signal operation +* +* Description: +* +* 1) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_acquire that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_or +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +* +* 2) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_relaxed that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_or +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +*/ + +#include +#include "config.h" +#include "test_signal_wait_utils.h" + +hsa_signal_value_t or_initial_val() { + return (hsa_signal_value_t) (uint64_t) 0; +} + +hsa_signal_value_t or_wakeup_val(int indx) { + hsa_signal_value_t value = 0; + int ii; + for (ii = 0; ii <= indx; ++ii) { + value |= (1 << ii); + } + return value; +} + +hsa_signal_value_t or_set_val(int indx) { + return (hsa_signal_value_t) (1 << indx); +} + +/** +* +* @Brief: +* Implement Description #1 +* +* @Return: +* int +* +*/ + +int test_signal_wait_acquire_or() { + // Test the various or signal operations with wait acquire + signal_wait_test_v2(hsa_signal_or_acquire, signal_wait_acquire_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_or_release, signal_wait_acquire_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_or_relaxed, signal_wait_acquire_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_or_acq_rel, signal_wait_acquire_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_signal_wait_relaxed_or() { + // Test the various or signal operations with wait relaxed + signal_wait_test_v2(hsa_signal_or_acquire, signal_wait_relaxed_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_or_release, signal_wait_relaxed_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_or_relaxed, signal_wait_relaxed_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_or_acq_rel, signal_wait_relaxed_test, or_initial_val, or_wakeup_val, or_set_val, NUM_THREADS); + return 0; +} diff --git a/src/core/signals/test_signal_wait_satisfied_conditions.c b/src/core/signals/test_signal_wait_satisfied_conditions.c new file mode 100644 index 0000000..b5db064 --- /dev/null +++ b/src/core/signals/test_signal_wait_satisfied_conditions.c @@ -0,0 +1,124 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_wait_satisfied_conditions + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_wait APIs properly use all of the + * hsa_signal_condition_t specifiers, specifically when the signal value + * already satisfies the condition. + * + * Test Description: + * 1) Create a signal. + * 2) For each of the hsa_signal_condition_t specifiers + * a) Set the signal's value and a compare value such that the + * condition is already satisfied. + * b) Wait on the signal using one of the hsa_signal_wait APIs + * 3) Repeat this for all hsa_signal_wait APIs + * +Expected Results: The waiting thread should return immediately + */ + +#include +#include + +void do_signal_wait_satisfied_conditions(hsa_signal_condition_t wait_condition, + hsa_signal_value_t set_value, + hsa_signal_value_t wait_value) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the signals with 0 + hsa_signal_t signals[2]; + status = hsa_signal_create(0, 0, NULL, signals); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_signal_create(0, 0, NULL, signals + 1); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set the values + hsa_signal_store_relaxed(signals[0], set_value); + hsa_signal_store_relaxed(signals[1], set_value); + + // Wait for the response + hsa_signal_wait_relaxed(signals[0], wait_condition, wait_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + hsa_signal_wait_acquire(signals[1], wait_condition, wait_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return; +} + +void signal_wait_satisfied_conditions_eq() { + do_signal_wait_satisfied_conditions(HSA_SIGNAL_CONDITION_EQ, 1, 1); + return; +} + +void signal_wait_satisfied_conditions_ne() { + do_signal_wait_satisfied_conditions(HSA_SIGNAL_CONDITION_NE, 1, 2); + return; +} + +void signal_wait_satisfied_conditions_lt() { + do_signal_wait_satisfied_conditions(HSA_SIGNAL_CONDITION_LT, 1, 2); + return; +} + +void signal_wait_satisfied_conditions_gte() { + do_signal_wait_satisfied_conditions(HSA_SIGNAL_CONDITION_GTE, 2, 2); + do_signal_wait_satisfied_conditions(HSA_SIGNAL_CONDITION_GTE, 2, 1); + return; +} + +int test_signal_wait_satisfied_conditions() { + // Do NOT set the "set_value" or "wait_value" to 0. + // Signal values are initialized to 0. + signal_wait_satisfied_conditions_eq(); + signal_wait_satisfied_conditions_ne(); + signal_wait_satisfied_conditions_lt(); + signal_wait_satisfied_conditions_gte(); + return 0; +} diff --git a/src/core/signals/test_signal_wait_store.c b/src/core/signals/test_signal_wait_store.c new file mode 100644 index 0000000..8a09aab --- /dev/null +++ b/src/core/signals/test_signal_wait_store.c @@ -0,0 +1,178 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_wait_store + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_wait API will respond appropriately + * to signal value changes made by the hsa_signal_store API. + * + * Test Description: + * 1) Create a signal with an initial value of 0. + * 2) Create several threads + * a) Each thread should specify await condition that requires + * a signal value that won't awake any of the other threads. + * b) Each thread should wait on the signal with that condition. + * 3) In the main thread, use the various flavors of hsa_signal_store + * to satisfy those conditions, one at a time. + * 4) For each modification of the signal value, check to see if the + * appropriate thread, and only the appropriate thread, finished + * waiting. + * 4) Repeat for all versions of the hsa_signal_wait, using all of the + * memory ordering variants. + * + * Expected Results: Only the thread that satisfies a specific condition + * should quit waiting. + */ + +#include +#include +#include +#include +#include +#include "config.h" + +int num_waked_thread = 0; + +pthread_mutex_t test_mutex; + +typedef struct { + int signal_wait_val; + hsa_signal_t signal; +} param; + +// test function for hsa_signal_wait_acquire +void test_signal_wait_acquire(void *data) { + param* param_ptr = (param*) data; + int signal_wait_val = param_ptr->signal_wait_val; + hsa_signal_t signal = param_ptr->signal; + hsa_signal_value_t signal_val = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, signal_wait_val, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // increment the number of waked-up threads + pthread_mutex_lock(&test_mutex); + num_waked_thread += 1; + pthread_mutex_unlock(&test_mutex); +} + +// test function for hsa_signal_wait_relaxed +void test_signal_wait_relaxed(void *data) { + param* param_ptr = (param*) data; + int signal_wait_val = param_ptr->signal_wait_val; + hsa_signal_t signal = param_ptr->signal; + + hsa_signal_value_t signal_val = hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_EQ, signal_wait_val, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + // increment the number of waked-up threads + pthread_mutex_lock(&test_mutex); + num_waked_thread += 1; + pthread_mutex_unlock(&test_mutex); +} + +int test_signal_wait_store(int use_release) { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + hsa_signal_t signal; + status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + int *wait_signal_vals = (int *)malloc(sizeof(int) * NUM_THREADS); + + // create threads, one half with hsa_signal_wait_relaxed, one half with + // hsa_signal_wait_acquire + struct test_group *tg_signal_wt = test_group_create(NUM_THREADS); + + // Declare the parameter array + param params[NUM_THREADS]; + + int ii; + for (ii = 0; ii < NUM_THREADS; ++ii) { + params[ii].signal = signal; + params[ii].signal_wait_val = ii + 1; + test_group_add(tg_signal_wt, ii % 2 ? &test_signal_wait_acquire : &test_signal_wait_relaxed, ¶ms[ii], 1); + } + pthread_mutex_init(&test_mutex, NULL); + + test_group_thread_create(tg_signal_wt); + + test_group_start(tg_signal_wt); + + // increment signal value to wake up corresponding threads + for (ii = 0; ii < NUM_THREADS; ++ii) { + if (use_release) { + hsa_signal_store_release(signal, ii + 1); + } else { + hsa_signal_store_relaxed(signal, ii + 1); + } + + // wait until threads ii wake up + while (test_group_test_status(tg_signal_wt, ii) != TEST_STOP); + + // check if the number of waked-up thread equals to number of signal + // value set-up. + ASSERT_MSG(num_waked_thread == ii + 1, "more than one thread has been waked up\n"); + } + + test_group_exit(tg_signal_wt); + test_group_destroy(tg_signal_wt); + + status = hsa_signal_destroy(signal); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + + free(wait_signal_vals); + return 0; +} + +int test_signal_wait_store_relaxed() { + return test_signal_wait_store(0); +} + +int test_signal_wait_store_release() { + return test_signal_wait_store(1); +} diff --git a/src/core/signals/test_signal_wait_subtract.c b/src/core/signals/test_signal_wait_subtract.c new file mode 100644 index 0000000..428d745 --- /dev/null +++ b/src/core/signals/test_signal_wait_subtract.c @@ -0,0 +1,130 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_wait_subtract +* +* Purpose: +* Verify atomicity feature of signal operation +* +* Description: +* +* 1) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_acquire that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_subtract +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +* +* 2) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_relaxed that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_subtract +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +*/ + +#include +#include "config.h" +#include "test_signal_wait_utils.h" + +hsa_signal_value_t subtract_initial_val() { + return (hsa_signal_value_t) 512; +} + +hsa_signal_value_t subtract_wakeup_val(int indx) { + return (hsa_signal_value_t) 512 - (indx + 1); +} + +hsa_signal_value_t subtract_set_val(int indx) { + return (hsa_signal_value_t) 1; +} + +/** +* +* @Brief: +* Implement Description #1 +* +* @Return: +* int +* +*/ + +int test_signal_wait_acquire_subtract() { + // Test the various subtract signal operations with wait acquire + signal_wait_test_v2(hsa_signal_subtract_acquire, signal_wait_acquire_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_subtract_release, signal_wait_acquire_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_subtract_relaxed, signal_wait_acquire_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_subtract_acq_rel, signal_wait_acquire_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_signal_wait_relaxed_subtract() { + // Test the various subtract signal operations with wait relaxed + signal_wait_test_v2(hsa_signal_subtract_acquire, signal_wait_relaxed_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_subtract_release, signal_wait_relaxed_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_subtract_relaxed, signal_wait_relaxed_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_subtract_acq_rel, signal_wait_relaxed_test, subtract_initial_val, subtract_wakeup_val, subtract_set_val, NUM_THREADS); + return 0; +} diff --git a/src/core/signals/test_signal_wait_timeout.c b/src/core/signals/test_signal_wait_timeout.c new file mode 100644 index 0000000..250b571 --- /dev/null +++ b/src/core/signals/test_signal_wait_timeout.c @@ -0,0 +1,117 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_wait_timeout + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_wait APIs properly use timeout hints. + * + * Test Description: + * 1) Create a signal. + * 2) Specify a value, condition operator and compare value that + * create a unsatisfied condition. + * 3) Wait on the signal with one of the hsa_signal_wait APIs using + * a non-zero timeout hint. + * 4) Use the system TIMESTAMP attribute to calculate how long the + * API waited before returning. + * + * Expected Results: The API should wait for the specified timeout + * hint, with a reasonable variation. + */ + +#include +#include + +int test_signal_wait_timeout(hsa_signal_value_t (*wait_fnc)(hsa_signal_t signal, hsa_signal_condition_t condition, hsa_signal_value_t compare_value, uint64_t timeout_hint, hsa_wait_state_t wait_state_hint)) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint16_t timestamp_freq; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, ×tamp_freq); + ASSERT(status == HSA_STATUS_SUCCESS); + + hsa_signal_t signal; + status = hsa_signal_create(0, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint64_t start_time, stop_time; + + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &start_time); + ASSERT(status == HSA_STATUS_SUCCESS); + + // The wait time should be 1 second if the timestamp_freq value is used + // Try both wait states + wait_fnc(signal, HSA_SIGNAL_CONDITION_EQ, 1, timestamp_freq, HSA_WAIT_STATE_BLOCKED); + wait_fnc(signal, HSA_SIGNAL_CONDITION_EQ, 1, timestamp_freq, HSA_WAIT_STATE_ACTIVE); + + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &stop_time); + ASSERT(status == HSA_STATUS_SUCCESS); + + uint64_t wait_delta = (stop_time - start_time); + + // The timeout value is a hint, so the actual wait time is arbitrary, but should + // be greater than zero. + ASSERT(wait_delta > 0); + + status = hsa_signal_destroy(signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_signal_wait_acquire_timeout() { + test_signal_wait_timeout(hsa_signal_wait_acquire); + return 0; +} + +int test_signal_wait_relaxed_timeout() { + test_signal_wait_timeout(hsa_signal_wait_relaxed); + return 0; +} diff --git a/src/core/signals/test_signal_wait_utils.c b/src/core/signals/test_signal_wait_utils.c new file mode 100644 index 0000000..0d6d6b1 --- /dev/null +++ b/src/core/signals/test_signal_wait_utils.c @@ -0,0 +1,265 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include + +typedef struct { + hsa_signal_t signal_handle; + volatile int num; + int* flag; +} param; + +void* signal_wait_acquire_test(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_value_t signal_value = param_ptr->num; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int* flag = param_ptr->flag; + + // Wait on signal with memory ordering of acquire + hsa_signal_wait_acquire(signal_handle, HSA_SIGNAL_CONDITION_EQ, signal_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + // Set flag to indicate the thread has waken up + *flag = 1; + + return NULL; +} + +void* signal_wait_relaxed_test(void* arg) { + param* param_ptr = (param*)arg; + hsa_signal_value_t signal_value = param_ptr->num; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int* flag = param_ptr->flag; + + // Wait on signal with memory ordering of relaxed + hsa_signal_wait_relaxed(signal_handle, HSA_SIGNAL_CONDITION_EQ, signal_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + // Set flag to indicate the thread has waken up + *flag = 1; + + return NULL; +} + +int signal_wait_test_v1(hsa_signal_value_t (*signal_func)(hsa_signal_t signal, + hsa_signal_value_t expected, hsa_signal_value_t value), + void* (*wait_test)(void* arg), + hsa_signal_value_t (*initial_val)(), + hsa_signal_value_t (*wakeup_val)(int index), + hsa_signal_value_t (*expect_val)(int index), + hsa_signal_value_t (*set_val)(int index), + int num_threads) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = initial_val(); + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + pthread_t id[num_threads]; + param arg[num_threads]; + int flag[num_threads]; + + int ii; + for (ii = 0; ii < num_threads; ++ii) { + flag[ii] = 0; + arg[ii].signal_handle = signal_handle; + arg[ii].num = wakeup_val(ii); + arg[ii].flag = &flag[ii]; + pthread_create(&id[ii], NULL, wait_test, &arg[ii]); + } + + // Set signal value to wake up specific thread + for (ii = 0; ii < num_threads; ++ii) { + hsa_signal_value_t value = signal_func(signal_handle, expect_val(ii), set_val(ii)); + hsa_signal_value_t expect = expect_val(ii); + ASSERT(expect == value); + pthread_join(id[ii], NULL); + // Check the flag data to make sure just one thread wakes up + int jj; + for (jj = 0; jj < num_threads; ++jj) { + if (jj <= ii) { + ASSERT(1 == flag[jj]); + } else { + ASSERT(0 == flag[jj]); + } + } + } + + // Check if all of the flag has been set to 1 + for (ii = 0; ii < num_threads; ++ii) { + ASSERT(1 == flag[ii]); + } + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int signal_wait_test_v2(void (*signal_func)(hsa_signal_t signal, hsa_signal_value_t value), + void* (*wait_test)(void* arg), + hsa_signal_value_t (*initial_val)(), + hsa_signal_value_t (*wakeup_val)(int index), + hsa_signal_value_t (*set_val)(int index), + int num_threads) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = initial_val(); + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + pthread_t id[num_threads]; + param arg[num_threads]; + int flag[num_threads]; + + int ii; + for (ii = 0; ii < num_threads; ++ii) { + flag[ii] = 0; + arg[ii].signal_handle = signal_handle; + arg[ii].num = wakeup_val(ii); + arg[ii].flag = &flag[ii]; + pthread_create(&id[ii], NULL, wait_test, &arg[ii]); + } + + // Set signal value to wake up specific thread + for (ii = 0; ii < num_threads; ++ii) { + hsa_signal_value_t value = set_val(ii); + signal_func(signal_handle, value); + pthread_join(id[ii], NULL); + // Check the flag data to make sure just one thread wakes up + int jj; + for (jj = 0; jj < num_threads; ++jj) { + if (jj <= ii) { + ASSERT(1 == flag[jj]); + } else { + ASSERT(0 == flag[jj]); + } + } + } + + // Check if all of the flag has been set to 1 + for (ii = 0; ii < num_threads; ++ii) { + ASSERT(1 == flag[ii]); + } + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int signal_wait_test_v3(hsa_signal_value_t (*signal_func)(hsa_signal_t signal, + hsa_signal_value_t value), + void* (*wait_test)(void* arg), + hsa_signal_value_t (*initial_val)(), + hsa_signal_value_t (*wakeup_val)(int index), + hsa_signal_value_t (*expect_val)(int index), + hsa_signal_value_t (*set_val)(int index), + int num_threads) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = initial_val(); + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Prepare data for threads + pthread_t id[num_threads]; + param arg[num_threads]; + int flag[num_threads]; + + int ii; + for (ii = 0; ii < num_threads; ++ii) { + flag[ii] = 0; + arg[ii].signal_handle = signal_handle; + arg[ii].num = wakeup_val(ii); + arg[ii].flag = &flag[ii]; + pthread_create(&id[ii], NULL, wait_test, &arg[ii]); + } + + // Set signal value to wake up specific thread + for (ii = 0; ii < num_threads; ++ii) { + hsa_signal_value_t value = signal_func(signal_handle, set_val(ii)); + hsa_signal_value_t expect = expect_val(ii); + ASSERT(expect == value); + pthread_join(id[ii], NULL); + // Check the flag data to make sure just one thread wakes up + int jj; + for (jj = 0; jj < num_threads; ++jj) { + if (jj <= ii) { + ASSERT(1 == flag[jj]); + } else { + ASSERT(0 == flag[jj]); + } + } + } + + // Check if all of the flag has been set to 1 + for (ii = 0; ii < num_threads; ++ii) { + ASSERT(1 == flag[ii]); + } + + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_wait_utils.h b/src/core/signals/test_signal_wait_utils.h new file mode 100644 index 0000000..4d6fb99 --- /dev/null +++ b/src/core/signals/test_signal_wait_utils.h @@ -0,0 +1,85 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _TEST_SIGNAL_WAIT_UTILS_H_ +#define _TEST_SIGNAL_WAIT_UTILS_H_ + +#include + +typedef struct { + hsa_signal_t signal_handle; + volatile int num; + int* flag; +} param; + +void* signal_wait_acquire_test(void* arg); + +void* signal_wait_relaxed_test(void* arg); + +int signal_wait_test_v1(hsa_signal_value_t (*signal_func)(hsa_signal_t signal, + hsa_signal_value_t expected, hsa_signal_value_t value), + void* (*wait_test)(void* arg), + hsa_signal_value_t (*initial_val)(), + hsa_signal_value_t (*wakeup_val)(int index), + hsa_signal_value_t (*expect_val)(int index), + hsa_signal_value_t (*set_val)(int index), + int num_threads); + +int signal_wait_test_v2(void (*signal_func)(hsa_signal_t signal, hsa_signal_value_t value), + void* (*wait_test)(void* arg), + hsa_signal_value_t (*initial_val)(), + hsa_signal_value_t (*wakeup_val)(int index), + hsa_signal_value_t (*set_val)(int index), + int num_threads); + +int signal_wait_test_v3(hsa_signal_value_t (*signal_func)(hsa_signal_t signal, hsa_signal_value_t value), + void* (*wait_test)(void* arg), + hsa_signal_value_t (*initial_val)(), + hsa_signal_value_t (*wakeup_val)(int index), + hsa_signal_value_t (*expect_val)(int index), + hsa_signal_value_t (*set_val)(int index), + int num_threads); + +#endif // _TEST_SIGNAL_WAIT_UTILS_H_ diff --git a/src/core/signals/test_signal_wait_xor.c b/src/core/signals/test_signal_wait_xor.c new file mode 100644 index 0000000..a480967 --- /dev/null +++ b/src/core/signals/test_signal_wait_xor.c @@ -0,0 +1,135 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** +* +* Test Name: signal_wait_or +* +* Purpose: +* Verify atomicity feature of signal operation +* +* Description: +* +* 1) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_acquire that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_or +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +* +* 2) Create a signal with an initial value of 0. +* Create NUM_THREAD threads +* Each thread should call hsa_signal_wait_relaxed that requires +* a signal value that won't awake any of the other threads. +* Each thread should wait on the signal with that condition. +* In the main thread, use the various flavors of hsa_signal_or +* to satisfy those conditions, one at a time. +* For each modification of the signal value, check to see if the +* appropriate thread, and only the appropriate thread, finished +* waiting. +*/ + +#include +#include "config.h" +#include "test_signal_wait_utils.h" + +hsa_signal_value_t xor_initial_val() { + return (hsa_signal_value_t) (uint64_t) 0; +} + +hsa_signal_value_t xor_wakeup_val(int indx) { + hsa_signal_value_t value = 0; + int ii; + for (ii = 0; ii <= indx; ++ii) { + value |= (1 << ii); + } + return value; +} + +hsa_signal_value_t xor_set_val(int indx) { + return (hsa_signal_value_t) (1 << indx); +} + +/** +* +* @Brief: +* Implement Description #1 +* +* @Return: +* int +* +*/ + +int test_signal_wait_acquire_xor() { + // Test the various xor signal operations with wait acquire + signal_wait_test_v2(hsa_signal_xor_acquire, signal_wait_acquire_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_xor_release, signal_wait_acquire_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_xor_relaxed, signal_wait_acquire_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_xor_acq_rel, signal_wait_acquire_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + return 0; +} + +/** +* +* @Brief: +* Implement Description #2 +* +* @Return: +* int +* +*/ + +int test_signal_wait_relaxed_xor() { + // Test the various xor signal operations with wait relaxed + signal_wait_test_v2(hsa_signal_xor_acquire, signal_wait_relaxed_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_xor_release, signal_wait_relaxed_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_xor_relaxed, signal_wait_relaxed_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + signal_wait_test_v2(hsa_signal_xor_acq_rel, signal_wait_relaxed_test, xor_initial_val, xor_wakeup_val, xor_set_val, NUM_THREADS); + return 0; +} diff --git a/src/core/signals/test_signal_xor_acq_rel_ordering.c b/src/core/signals/test_signal_xor_acq_rel_ordering.c new file mode 100644 index 0000000..45e8dd4 --- /dev/null +++ b/src/core/signals/test_signal_xor_acq_rel_ordering.c @@ -0,0 +1,194 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_xor_acq_rel_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_xor_acq_rel API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized to have + * the last bit set. + * 2) Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * 3) Start one thread that + * a) Checks the value of y in a loop using hsa_cas_acq_rel using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set. + * d) Replaces the x values by using signal_store_relaxed to set them to 0, + * and then uses signal_or_relaxed to set the first bit only. + * d) Replaces the value of y by using signal_load_relaxed to set the value to 0, + * and then uses signal_xor_acq_rel to set the first bit only. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if the last bit of y has been set, set y to 0, and then check if last bits of all x have been set and set the first bit of all x, and then set the first bit of y +void test_signal_xor_acq_rel_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_acq_rel(y, LAST_BIT, 0) != LAST_BIT); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "the last bit of the signal value should be set\n"); + + // change x value to 0 + hsa_signal_store_relaxed(x[ii], 0); + + // set first bit of x's + hsa_signal_or_relaxed(x[ii], FIRST_BIT); + } + + // set first bit of y + hsa_signal_xor_acq_rel(y, FIRST_BIT); + } + return; +} + + +// test func two: +// check y, if the first bit of y has been set, set y to 0, and then check if first bits of all x have been set and set the last bit of all x, and then set the last bit of y +void test_signal_xor_acq_rel_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 2 + while (hsa_signal_cas_acq_rel(y, FIRST_BIT, 0) != FIRST_BIT); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0x80000000 on 32bit machine or 0x8000000000000000 on 64bit machine + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "the first bit of the signal value should be set\n"); + + // change x value to 0 + hsa_signal_store_relaxed(x[ii], 0); + + // set last bit of x's + hsa_signal_or_relaxed(x[ii], LAST_BIT); + } + + // set last bit of y + hsa_signal_xor_acq_rel(y, LAST_BIT); + } + return; +} + +int test_signal_xor_acq_rel_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x with setting last bit to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y with setting last bit to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_xor_acq_rel_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_xor_acq_rel_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_xor_acq_rel_ordering_transitive.c b/src/core/signals/test_signal_xor_acq_rel_ordering_transitive.c new file mode 100644 index 0000000..cde67ad --- /dev/null +++ b/src/core/signals/test_signal_xor_acq_rel_ordering_transitive.c @@ -0,0 +1,232 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_xor_acq_rel_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_xor_acq_rel API + * API enforces transitive memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized + * to have their first bit set. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should have its last bit set and the z signal + * value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value has the last bit set. + * b) Uses signal_xor_acq_rel to clear all of the y bits. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting each value to have the last bit set. + * d) Replaces the x values by using signal_xor_relaxed to set the first + * bit. + * e) Replaces the value of y by using signal_xor_acq_rel to set the first bit. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of z is set, not the last. It also operates on signal z, + * not y. + * 5) Start a third thread that + * a) Waits until y has its first bit set using signal_wait_acquire. + * c) Sets the value of z to have its first bit set signal_xor_release. + * e) Waits until z has only its last bit set with signal_wait_acquire. + * f) Sets the value y to have its last bit set using signal_xor_release. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_xor_acq_rel_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until last bit of y has been set or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != LAST_BIT) + if (y_val == -1) return; + + hsa_signal_xor_acq_rel(y, LAST_BIT); + + for (ii = 0; ii < NUM_X; ++ii) { + // only last bit of every x should be set + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "only last bit of x value should be set\n"); + + // change first bit of x to 1 + hsa_signal_xor_relaxed(x[ii], FIRST_BIT | LAST_BIT); + } + + // set first bit of y + hsa_signal_xor_acq_rel(y, FIRST_BIT); + } + return; +} + +void test_signal_xor_acq_rel_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until first bit of z has been set or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != FIRST_BIT) + if (z_val == -1) return; + + hsa_signal_xor_acq_rel(z, FIRST_BIT); + + for (ii = 0; ii < NUM_X; ++ii) { + // only first bit of every x should be set + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "only first bit of x value should be set\n"); + + // change last bit of x + hsa_signal_xor_relaxed(x[ii], LAST_BIT | FIRST_BIT); + } + + // set last bit of z + hsa_signal_xor_acq_rel(z, LAST_BIT); + } + return; +} + +void test_signal_xor_acq_rel_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until first bit of y has been set up + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, FIRST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set first bit of z + hsa_signal_xor_release(z, FIRST_BIT | LAST_BIT); + + // wait until last bit of z has been set up + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, LAST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set last bit of y to 1 + hsa_signal_xor_release(y, LAST_BIT | FIRST_BIT); + } + + // set y to -1 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != FIRST_BIT); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + return; +} + +int test_signal_xor_acq_rel_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values with setting last bit + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_xor_acq_rel_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_xor_acq_rel_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_xor_acq_rel_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_xor_acquire_release_ordering.c b/src/core/signals/test_signal_xor_acquire_release_ordering.c new file mode 100644 index 0000000..c9f9518 --- /dev/null +++ b/src/core/signals/test_signal_xor_acquire_release_ordering.c @@ -0,0 +1,196 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_xor_acquire_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_xor_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create several signals and store the handles in an array, + * denoted by x[]. All the signal values should be initialized to have + * the last bit set. + * 2) Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * 3) Start one thread that + * a) Checks the value of y in a loop using hsa_cas_release using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Resets the value to 0 using signal_xor_acquire. + * d) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set. + * e) Replaces the x values by using signal_load_relaxed to set them to 0, + * and then uses signal_xor_relaxed to set the first bit only. + * f) Replaces the value of y by using signal_load_relaxed to set the value to 0, + * and then uses signal_xor_release to set the first bit only. + * g) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * 5) Let both threads run for several iterations. + * + * Expected Results: For each cycle all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[NUM_X]; + +// test func one: +// check y, if the last bit of y has been set, set y to 0, and then check if last bits of all x have been set and set the first bit of all x, and then set the first bit of y +void* test_signal_xor_acquire_release_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_release(y, LAST_BIT, 0) != LAST_BIT); + hsa_signal_xor_acquire(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "the last bit of the signal value should be set\n"); + + // change x value to 0 + hsa_signal_store_relaxed(x[ii], 0); + + // set first bit of x's + hsa_signal_or_relaxed(x[ii], FIRST_BIT); + } + + // set first bit of y + hsa_signal_xor_release(y, FIRST_BIT); + } + return data; +} + +// test func two: +// check y, if the first bit of y has been set, set y to 0, and then check if first bits of all x have been set and set the last bit of all x, and then set the last bit of y +void* test_signal_xor_acquire_release_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 2 + while (hsa_signal_cas_release(y, FIRST_BIT, 0) != FIRST_BIT); + hsa_signal_xor_acquire(y, 0); + + for (ii = 0; ii < NUM_X; ++ii) { + // every x value should equal to 0x80000000 on 32bit machine or 0x8000000000000000 on 64bit machine + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "the first bit of the signal value should be set\n"); + + // change x value to 0 + hsa_signal_store_relaxed(x[ii], 0); + + // set last bit of x's + hsa_signal_xor_relaxed(x[ii], LAST_BIT); + } + + // set last bit of y + hsa_signal_xor_release(y, LAST_BIT); + } + return data; +} + +int test_signal_xor_acquire_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x with setting last bit to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y with setting last bit to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_xor_acquire_release_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_xor_acquire_release_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_xor_acquire_release_ordering_transitive.c b/src/core/signals/test_signal_xor_acquire_release_ordering_transitive.c new file mode 100644 index 0000000..6715653 --- /dev/null +++ b/src/core/signals/test_signal_xor_acquire_release_ordering_transitive.c @@ -0,0 +1,233 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: signal_xor_acquire_release_ordering_transitive + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_xor_acquire and + * hsa_signal_xor_release APIs enforce transitive memory + * ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized + * to have their first bit set. + * 2) Create two control signals, denoted by y and z. The initial + * y signal value should have its last bit set and the z signal + * value should be 0. + * 3) Start one thread that + * a) Uses signal_load_relaxed to load the value of y in a loop, stopping + * when the value has the last bit set. + * b) Uses signal_xor_acquire to clear all of the y bits. + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting each value to have the last bit set. + * d) Replaces the x values by using signal_xor_relaxed to set the first + * bit. + * e) Replaces the value of y by using signal_xor_release to set the first bit. + * f) Starts over. + * g) If it detects that the value of y is -1, it terminates. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of z is set, not the last. It also operates on signal z, + * not y. + * 5) Start a third thread that + * a) Waits until y has its first bit set using signal_wait_acquire. + * c) Sets the value of z to have its first bit set signal_xor_release. + * e) Waits until z has only its last bit set with signal_wait_acquire. + * f) Sets the value y to have its last bit set using signal_xor_release. + * g) Starts over. + * h) After a set number of iterations the third thread should set + * both y and z signal values to -1 and terminate. + * 5) Let both threads run for thousands of iterations. + * + * Expected Results: For each cycle all memory operations that occured in thread one should + * be appropriately ordered in thread two after the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t z; +hsa_signal_t x[NUM_X]; + +void test_signal_xor_acquire_release_ordering_t1(void *data) { + int ii; + while (1) { + hsa_signal_value_t y_val = 0; + // loop until last bit of y has been set or y = -1 + while ((y_val = hsa_signal_load_relaxed(y)) != LAST_BIT) + if (y_val == -1) return; + + hsa_signal_xor_acquire(y, LAST_BIT); + + for (ii = 0; ii < NUM_X; ++ii) { + // only last bit of every x should be set + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "only last bit of x value should be set\n"); + + // change first bit of x to 1 + hsa_signal_xor_relaxed(x[ii], FIRST_BIT | LAST_BIT); + } + + // set first bit of y + hsa_signal_xor_release(y, FIRST_BIT); + } + return; +} + +void test_signal_xor_acquire_release_ordering_t2(void *data) { + int ii; + while (1) { + hsa_signal_value_t z_val = 0; + // loop until first bit of z has been set or z = -1 + while ((z_val = hsa_signal_load_relaxed(z)) != FIRST_BIT) + if (z_val == -1) return; + + hsa_signal_xor_acquire(z, FIRST_BIT); + + for (ii = 0; ii < NUM_X; ++ii) { + // only first bit of every x should be set + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "only first bit of x value should be set\n"); + + // change last bit of x + hsa_signal_xor_relaxed(x[ii], LAST_BIT | FIRST_BIT); + } + + // set last bit of z + hsa_signal_xor_release(z, LAST_BIT); + } + return; +} + +void test_signal_xor_acquire_release_ordering_t3(void *data) { + int ii; + for (ii = 0; ii < NUM_ITER_MEM_ORD; ++ii) { + // wait until first bit of y has been set up + hsa_signal_wait_acquire(y, HSA_SIGNAL_CONDITION_EQ, FIRST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set first bit of z + hsa_signal_xor_release(z, FIRST_BIT | LAST_BIT); + + // wait until last bit of z has been set up + hsa_signal_wait_acquire(z, HSA_SIGNAL_CONDITION_EQ, LAST_BIT, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // set last bit of y to 1 + hsa_signal_xor_release(y, LAST_BIT | FIRST_BIT); + } + + // set y to -1 until t1 finish to avoid deadlock + while (hsa_signal_load_relaxed(y) != FIRST_BIT); + hsa_signal_store_release(y, -1); + hsa_signal_store_release(z, -1); + return; +} + +int test_signal_xor_acquire_release_ordering_transitive() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < NUM_X; ++ii) { + // initialize all x values with setting last bit + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y to 0 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // initialize z to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, &z); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(3); + + // add test func one to the test group + test_group_add(test, test_signal_xor_acquire_release_ordering_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_xor_acquire_release_ordering_t2, NULL, 1); + + // add test func three to the test_group + test_group_add(test, test_signal_xor_acquire_release_ordering_t3, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < NUM_X; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_signal_destroy(z); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/core/signals/test_signal_xor_atomic.c b/src/core/signals/test_signal_xor_atomic.c new file mode 100644 index 0000000..bce254b --- /dev/null +++ b/src/core/signals/test_signal_xor_atomic.c @@ -0,0 +1,459 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_xor_atomic + * + * Purpose: + * Verify atomicity feature of signal operation + * + * Description: + * + * 1) Create a signal. + * Create 4 threads, that + * call hsa_signal_xor_acquire, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of xors, with no + * explicit synchronization between the threads. + * + * 2) Create a signal. + * Create 4 threads, that + * call hsa_signal_xor_release, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of xors, with no + * explicit synchronization between the threads. + * + * 3) Create a signal. + * Create 4 threads, that + * call hsa_signal_xor_relaxed, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of xors, with no + * explicit synchronization between the threads. + * + * 4) Create a signal. + * Create 4 threads, that + * call hsa_signal_xor_acq_rel, the first to 0, the second to all bits set, + * the third with alternating 1's and 0's with 0 in the first bit and + * the fourth with alternating 1's and 0's with 1 in the first bit. + * Run the threads for millions of iterations of xors, with no + * explicit synchronization between the threads. + * + */ + +#include +#include +#include +#include +#include "config.h" + +typedef struct test_group test_group; + +typedef enum OP_TYPE_T { + OP_TYPE_ACQUIRE, + OP_TYPE_ACQ_REL, + OP_TYPE_RELEASE, + OP_TYPE_RELAXED +} OP_TYPE_T; + +#ifdef HSA_LARGE_MODEL + #define NO_BITS 0x0000000000000000 + #define ALL_BITS 0xffffffffffffffff + #define ALT_BITS_1 0x5555555555555555 + #define ALT_BITS_2 0xaaaaaaaaaaaaaaaa +#else + #define NO_BITS 0x00000000 + #define ALL_BITS 0xffffffff + #define ALT_BITS_1 0x55555555 + #define ALT_BITS_2 0xaaaaaaaa +#endif + +// Define a structure to pass parameter to child function +typedef struct { + hsa_signal_t signal_handle; + int num; + OP_TYPE_T type; +} param; + +static void child_func(void* data) { + // Here, main thread will make sure runtime is open before thread creation and close after thread func finish properly, + // within thread, we will not open runtime + param* param_ptr = (param*)data; + hsa_signal_t signal_handle = param_ptr->signal_handle; + int num = param_ptr->num; + OP_TYPE_T type = param_ptr->type; + + // Different thread behaves differently + hsa_signal_value_t value, signal_value; + switch (num) { + case 0: { + signal_value = NO_BITS; + break; + } + case 1: { + signal_value = ALL_BITS; + break; + } + case 2: { + signal_value = ALT_BITS_1; + break; + } + case 3: { + signal_value = ALT_BITS_2; + break; + } + default: + ASSERT(num < 4); + } + + int ii; + switch (type) { + case OP_TYPE_ACQUIRE : { + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_xor_acquire(signal_handle, signal_value); + } + } + case OP_TYPE_ACQ_REL : { + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_xor_acq_rel(signal_handle, signal_value); + } + } + case OP_TYPE_RELEASE : { + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_xor_release(signal_handle, signal_value); + } + } + case OP_TYPE_RELAXED : { + for (ii = 0; ii < OP_COUNT; ++ii) { + hsa_signal_xor_relaxed(signal_handle, signal_value); + } + } + } + + return; +} + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_signal_xor_atomic_acquire() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_ACQUIRE; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_signal_xor_atomic_release() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_RELEASE; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_signal_xor_atomic_relaxed() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_RELAXED; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #4 + * + * @Return: + * int + * + */ + +int test_signal_xor_atomic_acq_rel() { + hsa_status_t status; + + // Open HsaRt + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_signal_t signal_handle; + hsa_signal_value_t initial_value = 0; + + // Create an new signal + status = hsa_signal_create(initial_value, 0, NULL, &signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create test group with size of 4 + test_group* group_ptr = NULL; + group_ptr = test_group_create(4); + ASSERT(NULL != group_ptr); + + int ii; + // Set parameter structure for each thread + param* param_ptr = (param*)malloc(sizeof(param)*4); + for (ii = 0; ii < 4; ++ii) { + (param_ptr+ii)->signal_handle = signal_handle; + (param_ptr+ii)->num = ii; + (param_ptr+ii)->type = OP_TYPE_ACQ_REL; + } + + // Add tests + for (ii = 0; ii < 4; ++ii) + test_group_add(group_ptr, child_func, param_ptr+ii, 1); + // Create threads for tests + test_group_thread_create(group_ptr); + // Set run flag to let multi-thread running + test_group_start(group_ptr); + test_group_wait(group_ptr); + + // Here, we can use load acquire or relaxed, but store must be release to make sure signal is set to 0 + hsa_signal_value_t loaded_value = hsa_signal_load_relaxed(signal_handle); + hsa_signal_store_release(signal_handle, 0); + ASSERT_MSG(NO_BITS == loaded_value || + ALL_BITS == loaded_value || + ALT_BITS_1 == loaded_value || + ALT_BITS_2 == loaded_value, + "Signal value is not what is expected!\n"); + + // Exit current test group + test_group_exit(group_ptr); + test_group_destroy(group_ptr); + + // Destroy the resource + status = hsa_signal_destroy(signal_handle); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/core/signals/test_signal_xor_release_ordering.c b/src/core/signals/test_signal_xor_release_ordering.c new file mode 100644 index 0000000..5d92c61 --- /dev/null +++ b/src/core/signals/test_signal_xor_release_ordering.c @@ -0,0 +1,194 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * + * Test Name: signal_xor_release_ordering + * Scope: Conformance + * + * Purpose: Verifies that the hsa_signal_xor_release API enforces + * correct memory ordering. + * + * Test Description: + * 1) Create 1024 signals and store the handles in an array, + * denoted by x[1024]. All the signal values should be initialized to have + * the last bit set. + * 2) Create a control signal, denoted by y, also initialized to have only + * the last bit set. + * 3) Start one thread that + * a) Checks the value of y in a loop using hsa_cas_acquire using + * 0 as the exchange value. + * b) When the value of y has only the last bit set, the thread stops looping, and + * c) Checks all of the x signal values with the signal_load_relaxed + * API, expecting all x values to have their last bit set. + * d) Replaces the x values by using signal_load_relaxed to set them to 0, + * and then uses signal_xor_relaxed to set the first bit only. + * d) Replaces the value of y by using signal_load_relaxed to set the value to 0, + * and then uses signal_xor_release to set the first bit only. + * f) Starts over. + * 4) Start a second thread that does exactly the same set of operations, + * but sets the last bit for all values, not the first, and triggers when + * the first bit of y is set, not the last. + * 5) Let both threads run for 32K iterations. + * + * Expected Results: For each cycle all memory operations + * that occured in thread one should be appropriately ordered in thread two after + * the y signal value was modified, and vice versa. + */ + +#include +#include +#include +#include "config.h" + +hsa_signal_t y; +hsa_signal_t x[1024]; + +// test func one: +// check y, if the last bit of y has been set, set y to 0, and then check if last bits of all x have been set and set the first bit of all x, and then set the first bit of y +void* test_signal_xor_release_t1(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 1 + while (hsa_signal_cas_release(y, LAST_BIT, 0) != LAST_BIT); + + for (ii = 0; ii < 1024; ++ii) { + // every x value should equal to 1 + hsa_signal_value_t sig_val_t1 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t1 == LAST_BIT, "the last bit of the signal value should be set\n"); + + // change x value to 0 + hsa_signal_store_relaxed(x[ii], 0); + + // set first bit of x's + hsa_signal_or_relaxed(x[ii], FIRST_BIT); + } + + // set first bit of y + hsa_signal_xor_release(y, FIRST_BIT); + } + return data; +} + +// test func two: +// check y, if the first bit of y has been set, set y to 0, and then check if first bits of all x have been set and set the last bit of all x, and then set the last bit of y +void* test_signal_xor_release_t2(void *data) { + int ii, jj; + // repeat NUM_ITER_MEM_ORD times + for (jj = 0; jj < NUM_ITER_MEM_ORD; ++jj) { + // change y to 0 if y equals to 2 + while (hsa_signal_cas_release(y, FIRST_BIT, 0) != FIRST_BIT); + + for (ii = 0; ii < 1024; ++ii) { + // every x value should equal to 0x80000000 on 32bit machine or 0x8000000000000000 on 64bit machine + hsa_signal_value_t sig_val_t2 = hsa_signal_load_relaxed(x[ii]); + ASSERT_MSG(sig_val_t2 == FIRST_BIT, "the first bit of the signal value should be set\n"); + + // change x value to 0 + hsa_signal_store_relaxed(x[ii], 0); + + // set last bit of x's + hsa_signal_xor_relaxed(x[ii], LAST_BIT); + } + + // set last bit of y + hsa_signal_xor_release(y, LAST_BIT); + } + return data; +} + +int test_signal_xor_release_ordering() { + hsa_status_t status; + status = hsa_init(); + ASSERT(status == HSA_STATUS_SUCCESS); + + int ii; + for (ii = 0; ii < 1024; ++ii) { + // initialize all x with setting last bit to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, x + ii); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + // initialize y with setting last bit to 1 + status = hsa_signal_create(LAST_BIT, 0, NULL, &y); + ASSERT(status == HSA_STATUS_SUCCESS); + + // create test_group + struct test_group *test = test_group_create(2); + + // add test func one to the test group + test_group_add(test, test_signal_xor_release_t1, NULL, 1); + + // add test func two to the test_group + test_group_add(test, test_signal_xor_release_t2, NULL, 1); + + // create threads for each test + test_group_thread_create(test); + + // start test functions + test_group_start(test); + + // wait all tests functions finish + test_group_wait(test); + + // exit all tests + test_group_exit(test); + + // cleanup resources + test_group_destroy(test); + + for (ii = 0; ii < 1024; ++ii) { + status = hsa_signal_destroy(x[ii]); + ASSERT(status == HSA_STATUS_SUCCESS); + } + + status = hsa_signal_destroy(y); + ASSERT(status == HSA_STATUS_SUCCESS); + + status = hsa_shut_down(); + ASSERT(status == HSA_STATUS_SUCCESS); + return 0; +} diff --git a/src/extensions/ext_api/hsa_ext_api.c b/src/extensions/ext_api/hsa_ext_api.c new file mode 100644 index 0000000..a42339f --- /dev/null +++ b/src/extensions/ext_api/hsa_ext_api.c @@ -0,0 +1,82 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_ext_api.h" + +DEFINE_TEST(hsa_ext_program_create); +DEFINE_TEST(hsa_ext_program_create_not_initialized); +DEFINE_TEST(hsa_ext_program_create_invalid_argument); +DEFINE_TEST(hsa_ext_program_destroy); +DEFINE_TEST(hsa_ext_program_destroy_not_initialized); +DEFINE_TEST(hsa_ext_program_destroy_invalid_program); +//DEFINE_TEST(hsa_ext_program_add_module); +//DEFINE_TEST(hsa_ext_program_add_module_not_initialized); +//DEFINE_TEST(hsa_ext_program_add_module_errors); +DEFINE_TEST(hsa_ext_program_finalize); +DEFINE_TEST(hsa_ext_program_finalize_not_initialized); +DEFINE_TEST(hsa_ext_program_finalize_invalid_program); +DEFINE_TEST(hsa_ext_program_finalize_invalid_isa); +DEFINE_TEST(hsa_ext_program_finalize_directive_mismatch); + + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_TEST(hsa_ext_program_create); + ADD_TEST(hsa_ext_program_create_not_initialized); + ADD_TEST(hsa_ext_program_create_invalid_argument); + ADD_TEST(hsa_ext_program_destroy); + ADD_TEST(hsa_ext_program_destroy_not_initialized); + ADD_TEST(hsa_ext_program_destroy_invalid_program); + ADD_TEST(hsa_ext_program_finalize); + ADD_TEST(hsa_ext_program_finalize_not_initialized); + ADD_TEST(hsa_ext_program_finalize_invalid_program); + ADD_TEST(hsa_ext_program_finalize_invalid_isa); + ADD_TEST(hsa_ext_program_finalize_directive_mismatch); +// ADD_TEST(hsa_ext_program_add_module); +// ADD_TEST(hsa_ext_program_add_module_not_initialized); +// ADD_TEST(hsa_ext_program_add_module_errors); + RUN_TESTS(); +} diff --git a/src/extensions/ext_api/hsa_ext_api.h b/src/extensions/ext_api/hsa_ext_api.h new file mode 100644 index 0000000..2b3df10 --- /dev/null +++ b/src/extensions/ext_api/hsa_ext_api.h @@ -0,0 +1,64 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_API_H_ +#define _HSA_API_H_ + +extern int test_hsa_ext_program_create(); +extern int test_hsa_ext_program_create_not_initialized(); +extern int test_hsa_ext_program_create_invalid_argument(); +extern int test_hsa_ext_program_destroy(); +extern int test_hsa_ext_program_destroy_not_initialized(); +extern int test_hsa_ext_program_destroy_invalid_program(); +//extern int test_hsa_ext_program_add_module(); +//extern int test_hsa_ext_program_add_module_not_initialized(); +//extern int test_hsa_ext_program_add_module_errors(); +extern int test_hsa_ext_program_finalize(); +extern int test_hsa_ext_program_finalize_not_initialized(); +extern int test_hsa_ext_program_finalize_invalid_program(); +extern int test_hsa_ext_program_finalize_invalid_isa(); +extern int test_hsa_ext_program_finalize_directive_mismatch(); + +#endif // _HSA_INIT_H_ diff --git a/src/extensions/ext_api/test_hsa_ext_program_create.c b/src/extensions/ext_api/test_hsa_ext_program_create.c new file mode 100644 index 0000000..b98883b --- /dev/null +++ b/src/extensions/ext_api/test_hsa_ext_program_create.c @@ -0,0 +1,306 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include + +/** + * + * Test Name: hsa_ext_program_create + * + * Purpose: + * Verify that if the extension API works as expected + * + * Description: + * + * 1) Iterate over all agents and create a program with the one that supports kernel dispatch. + * + * 2) Before the runtime is initialized call hsa_ext_program_create and check + * that the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_ext_program_create with an invalid machine model , + * an invalid profile, and invalid FLOAT_ROUNDING_MODE, invalid options. + * Check if the return value is HSA_STATUS_ERROR_INVALID_AGENT. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + +int test_hsa_ext_program_create(){ + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_machine_model_t machine_model; + status = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get machine_model.\n"); + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_PROFILE, &profile); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_profile.\n"); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Attempting to create a program with hsa_ext_program_create API failed"); + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_default_float_rounding_mode.\n"); + + status = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get machine_model.\n"); + + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Attempting to create a program with hsa_ext_program_create API failed"); + + status = pfn.hsa_ext_program_destroy(program); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_ext_program_create_not_initialized() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_machine_model_t machine_model = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get machine_model.\n"); + ASSERT(machine_model == HSA_MACHINE_MODEL_LARGE || machine_model == HSA_MACHINE_MODEL_SMALL); + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_PROFILE, &profile); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_profile.\n"); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_default_float_rounding_mode.\n"); + + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + if(HSA_STATUS_ERROR_INVALID_AGENT == status){ + ASSERT_MSG(0, "The hsa_ext_program_create API returned HSA_STATUS_ERROR_INVALID_AGENT instead of HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized or is shutdown before API is called.\n"); + } else if(HSA_STATUS_ERROR_NOT_INITIALIZED == status) { + // This is the expected error + } else { + ASSERT_MSG(0, "The hsa_ext_program_create API returned an error other than HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized.\n"); + } + + status = pfn.hsa_ext_program_destroy(program); + if(HSA_STATUS_ERROR_INVALID_AGENT == status){ + ASSERT_MSG(0, "The hsa_ext_program_create API returned HSA_STATUS_ERROR_INVALID_AGENT instead of HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized or is shutdown before API is called.\n"); + } else if(HSA_STATUS_ERROR_NOT_INITIALIZED == status) { + // This is the expected error + } else { + ASSERT_MSG(0, "The hsa_ext_program_create API returned an error other than HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized.\n"); + } + + return 0; +} + +/** + * + * @Brief: + * Implement Description #3 + * + * @Return: + * int + * + */ + +int test_hsa_ext_program_create_invalid_argument(){ + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_PROFILE, &profile); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_profile.\n"); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Attempting to create a program with hsa_ext_program_create API failed"); + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_default_float_rounding_mode.\n"); + + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + + // Invalid machine_model + hsa_machine_model_t machine_model = 2; + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + if(HSA_STATUS_SUCCESS == status){ + ASSERT_MSG(1,"ext_program is created with a wrong machine_model. ERROR_INVALID_ARGUMET is expected, ERROR received.\n"); + } else if(HSA_STATUS_ERROR_INVALID_ARGUMENT == status) { + // This indicate proper behaviour + } else { + ASSERT(0); + } + + // Invalid machine_model + machine_model = -1; + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + if(HSA_STATUS_SUCCESS == status) { + ASSERT_MSG(1,"hsa_ext_program_create is created with a wrong machine_model. ERROR_INVALID_ARGUMET is expected, ERROR received.\n"); + } else if(HSA_STATUS_ERROR_INVALID_ARGUMENT == status) { + // This indicate proper behaviour + } else { + ASSERT(0); + } + + // Invalid profile + machine_model = 0; + hsa_profile_t invalid_profile = -1 ; + status = pfn.hsa_ext_program_create(machine_model, invalid_profile, default_float_rounding_mode, NULL, &program); + if(HSA_STATUS_SUCCESS == status){ + ASSERT_MSG(1,"hsa_ext_program_create is created with a wrong profile. ERROR_INVALID_ARGUMENT is expected, ERROR received\n"); + } else if(HSA_STATUS_ERROR_INVALID_ARGUMENT == status) { + // This indicate proper behaviour + } else { + ASSERT(0); + } + + // Invalid default_floating_mode + hsa_default_float_rounding_mode_t invalid_default_float_rounding_mode = -1 ; + status = pfn.hsa_ext_program_create(machine_model, profile,invalid_default_float_rounding_mode, NULL, &program); + if(HSA_STATUS_SUCCESS == status) { + ASSERT_MSG(1,"hsa_ext_program_create is created with a wrong default_float_rounding_mode.ERROR_INVALID_ARGUMENT is expected. ERROR received\n"); + } else if(HSA_STATUS_ERROR_INVALID_ARGUMENT == status) { + // This indicate proper behaviour + } else { + ASSERT(0); + } + + // Invalid program + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, NULL); + if(HSA_STATUS_SUCCESS == status) { + ASSERT_MSG(1,"hsa_ext_program_create is created with a NULL program .ERROR_INVALID_ARGUMENT is expected. ERROR received\n"); + } else if(HSA_STATUS_ERROR_INVALID_ARGUMENT == status) { + // This indicate proper behaviour + } else { + ASSERT(0); + } + + // Since program is never created, destroy is not needed. + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/ext_api/test_hsa_ext_program_destroy.c b/src/extensions/ext_api/test_hsa_ext_program_destroy.c new file mode 100644 index 0000000..a3d3d6b --- /dev/null +++ b/src/extensions/ext_api/test_hsa_ext_program_destroy.c @@ -0,0 +1,231 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +/** + * + * Test Name: hsa_ext_program_destroy + * + * Purpose: + * Verify that if the extension API works as expected + * + * Description: + * + * 1) Destroy a HSAIL program, use hsa_ext_program_create to create one. + * + * 2) Before the runtime is initialized call hsa_ext_program_destroy and check + * that the return value is HSA_STATUS_ERROR_NOT_INITIALIZED. + * + * 3) Call hsa_ext_program_destroy with an invalid HSAIL program + * check if the return value is HSA_EXT_STATUS_ERROR_INVALID_PROGRAM. + * + */ + +/** + * + * @Brief: + * Implement Description #1 + * + * @Return: + * int + * + */ + + +int test_hsa_ext_program_destroy() { + hsa_status_t status; + + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_machine_model_t machine_model; + status = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get machine_model.\n"); + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_PROFILE, &profile); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_profile.\n"); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Attempting to create a program with hsa_ext_program_create API failed"); + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_default_float_rounding_mode.\n"); + + status = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get machine_model.\n"); + + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Attempting to create a program with hsa_ext_program_create API failed"); + + status = pfn.hsa_ext_program_destroy(program); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Attempt to destroy a HSAIL progrma with hsa_ext_program_destroy API failed"); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_ext_program_destroy_not_initialized() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_machine_model_t machine_model = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get machine_model.\n"); + ASSERT(machine_model == HSA_MACHINE_MODEL_LARGE || machine_model == HSA_MACHINE_MODEL_SMALL); + + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + hsa_profile_t profile; + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_PROFILE, &profile); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_profile.\n"); + + hsa_default_float_rounding_mode_t default_float_rounding_mode; + status = hsa_agent_get_info(agent,HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &default_float_rounding_mode); + ASSERT_MSG(HSA_STATUS_SUCCESS == status, "Failed to get agent_info_default_float_rounding_mode.\n"); + + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = pfn.hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + if(HSA_STATUS_ERROR_NOT_INITIALIZED == status) { + // This is the expected error + } else{ + ASSERT_MSG(0, "The hsa_ext_program_create API returned an error other than HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized.\n"); + } + + status = pfn.hsa_ext_program_destroy(program); + if(HSA_STATUS_ERROR_NOT_INITIALIZED == status){ + // This is the expected error + } else { + ASSERT_MSG(0, "The hsa_ext_program_destroy API returned an error other than HSA_STATUS_ERROR_NOT_INITIALIZED when the runtime wasn't initialized. ERROR received.\n"); + } + return 0; +} + +/** + * + * @Brief: + * Implement Description #2 + * + * @Return: + * int + * + */ + +int test_hsa_ext_program_destroy_invalid_program() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + // This indicates that finalization isn't + // supported. The test will succeed in that + // case. + if(HSA_STATUS_SUCCESS != status) { + return 0; + } + + hsa_ext_program_t invalid_program; + invalid_program.handle = (uint64_t)-1; + + status = pfn.hsa_ext_program_destroy(invalid_program); + ASSERT(HSA_EXT_STATUS_ERROR_INVALID_PROGRAM == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + return 0; +} diff --git a/src/extensions/ext_api/test_hsa_ext_program_finalize.c b/src/extensions/ext_api/test_hsa_ext_program_finalize.c new file mode 100644 index 0000000..7692659 --- /dev/null +++ b/src/extensions/ext_api/test_hsa_ext_program_finalize.c @@ -0,0 +1,449 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include + +/** + * + * Test Name: hsa_ext_program_finalize + * + * Purpose: + * Verify if the extension program finalize API works as expected + * + * Description: + * + */ + +int test_hsa_ext_program_finalize() { + // Initialize the HSA Runtime + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization function pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + hsa_code_object_t code_object; + + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_ext_program_finalize_not_initialized() { + // Initialize the HSA Runtime + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization function pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + // Shut down the runtime before hsa_ext_program_finalize + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to receive the following error code + ASSERT(HSA_STATUS_ERROR_NOT_INITIALIZED == status); + + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + return 0; +} + +int test_hsa_ext_program_finalize_invalid_program() { + // Initialize the HSA Runtime + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization function pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Create an invalid program with an invalid handle + hsa_ext_program_t invalid_program; + /*invalid_program.handle = (uint64_t)-1;*/ + invalid_program.handle = 0; + + // Use the invalid program handle to finalize the program + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(invalid_program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to receive the error code + ASSERT(HSA_EXT_STATUS_ERROR_INVALID_PROGRAM == status); + + // Finalize the program with valid program and extract the code object + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to receive success code + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_ext_program_finalize_invalid_isa() { + // Initialize the HSA Runtime + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization function pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + // Finalize the program and extract the code object + hsa_code_object_t code_object; + + // Create an invalid isa, try to finalize the program with an invalid isa + hsa_isa_t invalid_isa; + status = pfn.hsa_ext_program_finalize(program, invalid_isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to have the error code return + ASSERT(HSA_STATUS_ERROR_INVALID_ISA == status); + + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} + +int test_hsa_ext_program_finalize_directive_mismatch() { + // Initialize the HSA Runtime + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization function pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a control directive without initialization + hsa_ext_control_directives_t control_directives; + control_directives.required_dim = 4; + hsa_code_object_t code_object; + + // Finalize the program with mismatched control directives + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Error code expected + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Initialize the control_directives to empty + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Finalize the program and extract the code object + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/hsa_finalization.c b/src/extensions/finalization/hsa_finalization.c new file mode 100644 index 0000000..46b7882 --- /dev/null +++ b/src/extensions/finalization/hsa_finalization.c @@ -0,0 +1,87 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include "hsa_finalization.h" + + +DEFINE_TEST(finalization_concurrent_finalization); +DEFINE_TEST(finalization_control_directives_max_dynamic_group_size); +DEFINE_TEST(finalization_control_directives_max_flat_grid_size); +DEFINE_TEST(finalization_control_directives_max_flat_workgroup_size); +DEFINE_TEST(finalization_control_directives_required_grid_size); +DEFINE_TEST(finalization_control_directives_required_workgroup_size); +DEFINE_TEST(finalization_control_directives_required_dim); +DEFINE_TEST(finalization_dependent_modules); +DEFINE_TEST(finalization_incompatible_module); +DEFINE_TEST(finalization_invalid_module); +DEFINE_TEST(finalization_module_already_included); +DEFINE_TEST(finalization_module_count); +DEFINE_TEST(finalization_multiple_modules); +DEFINE_TEST(finalization_out_of_resources); + + +int main(int argc, char* argv[]) +{ + INITIALIZE_TESTSUITE(); + + ADD_TEST(finalization_concurrent_finalization); + ADD_TEST(finalization_control_directives_max_dynamic_group_size); + ADD_TEST(finalization_control_directives_max_flat_grid_size); + ADD_TEST(finalization_control_directives_max_flat_workgroup_size); + ADD_TEST(finalization_control_directives_required_grid_size); + ADD_TEST(finalization_control_directives_required_workgroup_size); + ADD_TEST(finalization_control_directives_required_dim); + ADD_TEST(finalization_dependent_modules); + ADD_TEST(finalization_incompatible_module); + ADD_TEST(finalization_invalid_module); + ADD_TEST(finalization_module_already_included); + ADD_TEST(finalization_module_count); + ADD_TEST(finalization_multiple_modules); + ADD_TEST(finalization_out_of_resources); + + RUN_TESTS(); +} diff --git a/src/extensions/finalization/hsa_finalization.h b/src/extensions/finalization/hsa_finalization.h new file mode 100644 index 0000000..75cc2f1 --- /dev/null +++ b/src/extensions/finalization/hsa_finalization.h @@ -0,0 +1,64 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_FINALIZATION_H_ +#define _HSA_FINALIZATION_H_ + +extern int test_finalization_concurrent_finalization(); +extern int test_finalization_control_directives_max_dynamic_group_size(); +extern int test_finalization_control_directives_max_flat_grid_size(); +extern int test_finalization_control_directives_max_flat_workgroup_size(); +extern int test_finalization_control_directives_required_grid_size(); +extern int test_finalization_control_directives_required_workgroup_size(); +extern int test_finalization_control_directives_required_dim(); +extern int test_finalization_dependent_modules(); +extern int test_finalization_incompatible_module(); +extern int test_finalization_invalid_module(); +extern int test_finalization_module_already_included(); +extern int test_finalization_module_count(); +extern int test_finalization_multiple_modules(); +extern int test_finalization_out_of_resources(); + +#endif // _HSA_FINALIZATION_H_ diff --git a/src/extensions/finalization/test_finalization_concurrent_finalization.c b/src/extensions/finalization/test_finalization_concurrent_finalization.c new file mode 100644 index 0000000..208dd4b --- /dev/null +++ b/src/extensions/finalization/test_finalization_concurrent_finalization.c @@ -0,0 +1,228 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_concurrent_finalization + * Scope: Extension (Finalization) + * Support: This test assumes that the system supports the finalization + * extension and that a viable agent that supports that extension + * can be found. + * + * Purpose: Verifies that concurrent program creation and finalization + * can occur. + * + * Test Description: + * 1) Load a module from a valid source, i.e. brig file. + * 2) Create several threads that: + * a) Create a program object. + * b) Add the module to the program. + * c) Finalize the program. + * d) Extract a kernel symbol from the program. + * e) Release all associated resources (not the module). + * 3) Free the module. + * + * Expected Results: All threads should be able to successfully create a + * program and finalize it. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +typedef struct concurrent_finalization_params_s { + char* module_name; + char* symbol_name; + hsa_ext_finalizer_pfn_t* pfn; + hsa_ext_module_t module; + hsa_agent_t agent; + hsa_isa_t isa; + hsa_executable_t executable; + hsa_code_object_t code_object; + uint64_t kernel_object; +} concurrent_finalization_params_t; + +void thread_proc_finalize(void* data) { + hsa_status_t status; + concurrent_finalization_params_t* param = (concurrent_finalization_params_t*)data; + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = param->pfn->hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Add the module to the program + status = param->pfn->hsa_ext_program_add_module(program, param->module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + // Finalize the program and extract the code object + status = param->pfn->hsa_ext_program_finalize(program, + param->isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + ¶m->code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + status = hsa_executable_create(HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, + ¶m->executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(param->executable, param->agent, param->code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(param->executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Releasing resources + param->pfn->hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = param->module_name; + symbol_record.symbol.handle = (uint64_t)-1; + status = get_executable_symbols(param->executable, + param->agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + ¶m->symbol_name, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + ¶m->kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); +} + +int test_finalization_concurrent_finalization() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + // module.handle = (uint64_t)-1; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + // ASSERT((uint64_t)-1 != module.handle); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + const int num_threads = 16; + concurrent_finalization_params_t params[num_threads]; + int ii; + for (ii = 0; ii < num_threads; ++ii) { + params[ii].module_name = module_name; + params[ii].symbol_name = symbol_name; + params[ii].pfn = &pfn; + params[ii].module = module; + params[ii].agent = agent; + params[ii].isa = isa; + params[ii].executable.handle = (uint64_t)-1; + params[ii].kernel_object = 0; + } + + struct test_group *tg_concurrent_finalization = test_group_create(num_threads); + + for (ii = 0; ii < num_threads; ++ii) { + test_group_add(tg_concurrent_finalization, &thread_proc_finalize, ¶ms[ii], 1); + } + + test_group_thread_create(tg_concurrent_finalization); + test_group_start(tg_concurrent_finalization); + test_group_wait(tg_concurrent_finalization); + test_group_exit(tg_concurrent_finalization); + test_group_destroy(tg_concurrent_finalization); + + // Releasing resources + destroy_module(module); + for (ii = 0; ii < num_threads; ++ii) { + status = hsa_executable_destroy(params[ii].executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(params[ii].code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_control_directives_max_dynamic_group_size.c b/src/extensions/finalization/test_finalization_control_directives_max_dynamic_group_size.c new file mode 100644 index 0000000..c1be732 --- /dev/null +++ b/src/extensions/finalization/test_finalization_control_directives_max_dynamic_group_size.c @@ -0,0 +1,190 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_control_directives_max_dynamic_group_size + * Scope: Extension (Finalization) + * + * Test Description: + * 1) With a value greater than the value specified by the maxdynamicgroupsize directive + * 2) Less than the value specified in the maxdynamicgroupsize directive. + * Expected Results: The finalization will fail if the value is greater than maxdynamicgroupsize, + * and return corresponding error code. Finalization will success if the value is less or equal + * to the one specified by maxdynamicgroupsize. + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_control_directives_max_dynamic_group_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "control_device.brig"; + char symbol_name[256] = "&__control_device_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Let the maximum dynamic group size to be 8K byte + control_directives.max_dynamic_group_size = 8 * 1024; + + // Finalize the program, expect to fail + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the maximum dynamic group size to be 4K byte + control_directives.max_dynamic_group_size = 4 * 1024; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to be finalized successfully + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_control_directives_max_flat_grid_size.c b/src/extensions/finalization/test_finalization_control_directives_max_flat_grid_size.c new file mode 100644 index 0000000..b93df27 --- /dev/null +++ b/src/extensions/finalization/test_finalization_control_directives_max_flat_grid_size.c @@ -0,0 +1,193 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_control_directives_max_flat_grid_size + * Scope: Extension (Finalization) + * + * Test Description: + * + * Attempt to finalize a program with a kernel that contains a maxflatgridsize control direct + * 1) With a value greater than that specified by the maxflatgridsize + * 2) With a value less than that specified by the maxflatgridsize specifier. + * + * Expected Results: The finalization will fail if the value is greater than maxflatgridsize, + * and return corresponding error code. Finalization will success if the value is less or equal + * to the one specified by maxflatgridsize. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_control_directives_max_flat_grid_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "control_device.brig"; + char symbol_name[256] = "&__control_device_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Let the max flat grid size to be 2000, larger than specified in kernel + control_directives.max_flat_grid_size= 2000; + + // Finalize the program, expect to fail + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the max flat grid size to be 500 + control_directives.max_flat_grid_size= 500; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to be finalized successfully + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_control_directives_max_flat_workgroup_size.c b/src/extensions/finalization/test_finalization_control_directives_max_flat_workgroup_size.c new file mode 100644 index 0000000..67ce0e8 --- /dev/null +++ b/src/extensions/finalization/test_finalization_control_directives_max_flat_workgroup_size.c @@ -0,0 +1,192 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_control_directives_max_flat_workgroup_size + * Scope: Extension (Finalization) + * + * Test Description: + * Attempt to finalize a program with a kernel that contains a maxflatworkgroupsize control direct + * 1) With a value greater than the value specified by the maxflatworkgroupsize directive + * 2) Less than the value specified in the maxflatworkgroupsize directive. + * + * Expected Results: The finalization will fail if the value is greater than maxflatworkgroupsize, + * and return corresponding error code. Finalization will success if the value is less or equal + * to the one specified by maxflatworkgroupsize. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_control_directives_max_flat_workgroup_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "control_device.brig"; + char symbol_name[256] = "&__control_device_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Let the maximum flat workgroup size to be 200, greater than specified in kernel + control_directives.max_flat_workgroup_size= 200; + + // Finalize the program, expect to fail + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the maximum flat workgroup size to be 50 + control_directives.max_flat_workgroup_size= 50; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to be finalized successfully + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_control_directives_required_dim.c b/src/extensions/finalization/test_finalization_control_directives_required_dim.c new file mode 100644 index 0000000..31de940 --- /dev/null +++ b/src/extensions/finalization/test_finalization_control_directives_required_dim.c @@ -0,0 +1,206 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_control_directives_required_dim + * Scope: Extension (Finalization) + * Test Description: + * 1) With a value greater than the value specified by the requireddim directive + * 2) Less than the value specified in the requireddim size directive + * 3) With a value equal to the requireddim directive + * + * Expected Results: Finalization will fail if the value is either greater or less + * than the value specified by requireddim. Finalization will success if the value + * is equal to the value specified by requireddim. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_control_directives_required_dim() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "control_device.brig"; + char symbol_name[256] = "&__control_device_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Let the required dim to be larger than specified in kernel + control_directives.required_dim = 5; + + // Finalize the program, expect to fail + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the required dim to be less than specified in kernel + control_directives.required_dim = 2; + + // Finalize the program, expect to fail + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the required dim to match the value specified in kernel + control_directives.required_dim = 3; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to be finalized successfully + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_control_directives_required_grid_size.c b/src/extensions/finalization/test_finalization_control_directives_required_grid_size.c new file mode 100644 index 0000000..d11474b --- /dev/null +++ b/src/extensions/finalization/test_finalization_control_directives_required_grid_size.c @@ -0,0 +1,212 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_control_directives_required_grid_size + * Scope: Extension (Finalization) + * + * Test Description: + * 1) With a value greater than the value specified by the requiredgridsize directive + * 2) Less than the value specified in the requiredgridsize directive + * 3) With a value equal to the requiredgridsize directive* + * Expected Results: The finalization will fail if the value is greater than requiredgridsize, + * and return corresponding error code. Finalization will success if the value is less or equal + * to the one specified by requiredgridsize. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_control_directives_required_grid_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "control_device.brig"; + char symbol_name[256] = "&__control_device_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Let the required_grid_size to be greater than specified in kernel + control_directives.required_grid_size[0]= 2000; + control_directives.required_grid_size[1]= 2000; + control_directives.required_grid_size[2]= 2000; + + // Finalize the program, expect to fail + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the required_grid_size to be less than specified in kernel + control_directives.required_grid_size[0]= 500; + control_directives.required_grid_size[1]= 500; + control_directives.required_grid_size[2]= 500; + + // Finalize the program, expect to fail + + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + + // Let the maximum dynamic group size to match the value specified in kernel + control_directives.required_grid_size[0]= 1000; + control_directives.required_grid_size[1]= 1000; + control_directives.required_grid_size[2]= 1000; + + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to be finalized successfully + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_control_directives_required_workgroup_size.c b/src/extensions/finalization/test_finalization_control_directives_required_workgroup_size.c new file mode 100644 index 0000000..de2c09d --- /dev/null +++ b/src/extensions/finalization/test_finalization_control_directives_required_workgroup_size.c @@ -0,0 +1,210 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_control_directives_required_workgroup_size + * Scope: Extension (Finalization) + * + * Test Description: + * 1) With a value greater than the value specified by the requiredworkgroupsize directive + * 2) Less than the value specified in the requiredworkgroupsize directive + * 3) With a value equal to the requiredgridsize + * Expected Results: The finalization will fail if the value is greater than requiredgridsize, + * and return corresponding error code. Finalization will success if the value is less or equal + * to the one specified by requiredgridsize. + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_control_directives_required_workgroup_size() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "control_device.brig"; + char symbol_name[256] = "&__control_device_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Let the required workgroup size to be greater than specified in kernel + control_directives.required_workgroup_size.x= 200; + control_directives.required_workgroup_size.y= 200; + control_directives.required_workgroup_size.z= 200; + + // Finalize the program, expect to fail + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the required workgroup size to be less than specified in kernel + control_directives.required_workgroup_size.x= 50; + control_directives.required_workgroup_size.y= 50; + control_directives.required_workgroup_size.z= 50; + + // Finalize the program, expect to fail + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to fail and receive the following error message + ASSERT(HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH == status); + + // Let the required workgroup size to match that specified in kernel + control_directives.required_workgroup_size.x= 100; + control_directives.required_workgroup_size.y= 100; + control_directives.required_workgroup_size.z= 100; + + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + // Expected to be finalized successfully + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_dependent_modules.c b/src/extensions/finalization/test_finalization_dependent_modules.c new file mode 100644 index 0000000..7977934 --- /dev/null +++ b/src/extensions/finalization/test_finalization_dependent_modules.c @@ -0,0 +1,253 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_dependent_modules + * Scope: Extension (Finalization) + * Support: This test assumes that the system supports the finalization + * extension and that a viable agent that supports that extension + * can be found. + * + * Purpose: Verify that a program that has two or more dependent + * modules can be finalized and the associated code can be used + * in dispatch. + * + * Test Description: + * 1) Create a hsa_ext_program_t object. + * 2) Load a module with a defined kernel that depends on functions + * and variables defined and declared in a second module. + * 3) Add the module to the program and attempt to finalize the program. + * The finalization attempt should fail with a status of + * HSA_EXT_STATUS_ERROR_INVALID_PROGRAM. + * 4) Add the second module module an attempt to finalize again. It should + * succeed. + * 5) Extract symbols (kernels) associated with each module from the finalized + * program. + * 6) Dispatch each of the kernels on a valid agent. + * + * Expected Results: On the second attempt program should be properly + * finalized and all kernels should execute successfully. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define BLOCK_SIZE 1024 + +int test_finalization_dependent_modules() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find an agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Create a queue for dispatch + hsa_queue_t* queue; + status = hsa_queue_create(agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a memory region in the global segment + hsa_region_t global_region; + global_region.handle=(uint64_t)-1; + hsa_agent_iterate_regions(agent, get_global_memory_region_fine_grained, &global_region); + ASSERT((uint64_t)-1 != global_region.handle); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t)-1; + hsa_agent_iterate_regions(agent, get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t)-1 != kernarg_region.handle); + + // Load the modules + const int num_modules = 2; + char* module_names[] = { + "depend_module1.brig", + "depend_module2.brig"}; + hsa_ext_module_t modules[num_modules]; + + // Load the 1st brig module from a file + int load_status = load_module_from_file(module_names[0], &modules[0]); + ASSERT(HSA_STATUS_SUCCESS == load_status); + + // Load the 2nd brig module from a file + load_status = load_module_from_file(module_names[1], &modules[1]); + ASSERT(HSA_STATUS_SUCCESS == load_status); + + // Finalize the executable + hsa_code_object_t code_object; + hsa_executable_t executable; + + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + status = finalize_executable(agent, + num_modules, + modules, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the executable symbols for dispatch + char* symbol_name = "&__element_add_kernel"; + symbol_record_t symbol_record; + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Define the argument structure + typedef struct __attribute__ ((aligned(16))) add_args_s { + uint32_t* in_one; + uint32_t* in_two; + uint32_t* out; + } add_args_t; + add_args_t add_args; + + // Allocate the kernel argument buffer from the correct region + char* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the arguments + status = hsa_memory_allocate(global_region, BLOCK_SIZE * sizeof(uint32_t), (void**) &(add_args.in_one)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, BLOCK_SIZE * sizeof(uint32_t), (void**) &(add_args.in_two)); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, BLOCK_SIZE * sizeof(uint32_t), (void**) &(add_args.out)); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize the data + int ii; + for(ii=0;ii, + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_incompatible_module + * Scope: Extension (Finalization) + * Support: This test assumes that the system supports the finalization + * extension and that a viable agent that supports that extension + * can be found. + * + * Purpose: Verify that if an attempt is made to add an incompatible module + * to a program the attempt will fail with the + * HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE return code, but that the program + * can still be finalized, code objects can be extracted and executed. + * + * Test Description: + * 1) Create a hsa_ext_program_t object with a specific hsa_machine_model_t, + * i.e. HSA_MACHINE_MODEL_LARGE. + * 2) Load a hsa_ext_module_t object from a valid source, i.e. a brig module + * stored on disk (profile type large). + * 3) Add the module to the program. + * 2) Load a hsa_ext_module_t object from an invalid source, i.e. a brig module + * stored on disk that hsa a different machine model (small). + * 4) Attempt to add the second module to the program. The API should return + * HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE. + * 5) Finalize the program, extract valid code objects and launch a kernel. + * + * Expected Results: After the failed attempt to add the bad module, the program + * should still be usable by the test case. + * + */ + +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_incompatible_module() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization function pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load an incompatible brig module + char incompatible_module_name[256] = "no_op_small.brig"; + hsa_ext_module_t incompatible_module; + int load_status = load_module_from_file(incompatible_module_name, &incompatible_module); + ASSERT(0 == load_status); + + // Attempt to add the incompatible module into the program. + status = pfn.hsa_ext_program_add_module(program, incompatible_module); + ASSERT(HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = symbol_name; + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_invalid_module.c b/src/extensions/finalization/test_finalization_invalid_module.c new file mode 100644 index 0000000..6832f65 --- /dev/null +++ b/src/extensions/finalization/test_finalization_invalid_module.c @@ -0,0 +1,195 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_invalid_module + * Scope: Extension (Finalization) + * Support: This test assumes that the system supports the finalization + * extension and that a viable agent that supports that extension + * can be found. + * + * Purpose: Verify that if an attempt is made to add an invalid module + * to a program, the attempt will fail with the + * HSA_EXT_STATUS_ERROR_INVALID_MODULE return code, but that the program + * can still be finalized, code objects can be extracted and executed. + * + * Test Description: + * 1) Create a hsa_ext_program_t object. + * 2) Load a hsa_ext_module_t object from a valid source. + * 3) Add the module to the program. + * 4) Attempt to add an invalid module (NULL) to the program. + * The API should return HSA_EXT_STATUS_ERROR_INVALID_MODULE. + * 5) Finalize the program, extract valid code objects and launch a kernel. + * + * Expected Results: After the failed attempt to add the bad module, the program + * should still be usable by the test case. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_invalid_module() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Initialize an invalid brig module + hsa_ext_module_t invalid_module; + + // Attempt to add the invalid module into the program. Expect to + // receive HSA_EXT_STATUS_ERROR_INVALID_MODULE error + status = pfn.hsa_ext_program_add_module(program, invalid_module); + ASSERT(HSA_EXT_STATUS_ERROR_INVALID_MODULE == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_module_already_included.c b/src/extensions/finalization/test_finalization_module_already_included.c new file mode 100644 index 0000000..3fc5aeb --- /dev/null +++ b/src/extensions/finalization/test_finalization_module_already_included.c @@ -0,0 +1,194 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_module_already_included + * Scope: Extension (Finalization) + * Support: This test assumes that the system supports the finalization + * extension and that a viable agent that supports that extension + * can be found. + * + * Purpose: Verify that if an attempt is made to add a module multiple + * times to a program times, attempts after the first will fail with the + * HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED, but that the program + * can still be finalized, code objects can be extracted and executed. + * + * Test Description: + * 1) Create a hsa_ext_program_t object. + * 2) Load a hsa_ext_module_t object from a valid source, i.e. a brig module + * stored on disk. + * 3) Add the module to the program. + * 4) Attempt to add the module again. The API should return + * HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED. + * 5) Finalize the program, extract valid code objects and launch a kernel. + * + * Expected Results: After the failed attempt to add the module, the program + * should still be usable by the test case. + * + */ + +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_module_already_included() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Attempt to add the module into the program again. Expect to + // receive HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED error + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbol + symbol_record_t symbol_record; + symbol_record.module_name = module_name; + symbol_record.symbol.handle = (uint64_t)-1; + char* symbol_name_ptr = &(symbol_name[0]); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + 1, + &symbol_name_ptr, + &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_record.symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Dispatch the kernel + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + launch_kernel_no_kernarg(queue, kernel_object, 1); + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_module_count.c b/src/extensions/finalization/test_finalization_module_count.c new file mode 100644 index 0000000..29a8bde --- /dev/null +++ b/src/extensions/finalization/test_finalization_module_count.c @@ -0,0 +1,140 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_module_count + * Scope: Extension (Finalization) + * Support: This test assumes that the system supports the finalization + * extension and that a viable agent that supports that extension + * can be found. + * + * Purpose: Verify that as modules are added to a program, the + * hsa_ext_program_iterate_modules API will properly count the + * number of modules added. + * + * Test Description: + * 1) Create a hsa_ext_program_t object. + * 2) Load several hsa_ext_module_t objects. + * 3) Add a module to the program. + * 4) Use the hsa_ext_program_iterate_modules to count the number + * of modules in the program, and verify the count. + * 5) Repeat 3 and 4 several times. + * + * Expected Results: The hsa_ext_program_iterate_modules API should + * properly iterate over all added modules. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +hsa_status_t callback_count_modules(hsa_ext_program_t program, + hsa_ext_module_t module, + void* data) { + int* count_modules = (int*)data; + ++(*count_modules); + return HSA_STATUS_SUCCESS; +} + +int test_finalization_module_count() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + const int num_modules = 3; + char* module_names[] = { + "no_op.brig", + "init_data.brig", + "vector_copy.brig"}; + hsa_ext_module_t modules[num_modules]; + + int ii; + for (ii = 0; ii < num_modules; ++ii) { + // Load the brig module from a file + status = load_module_from_file(module_names[ii], &modules[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, modules[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + + int count_modules = 0; + status = pfn.hsa_ext_program_iterate_modules(program, + callback_count_modules, + &count_modules); + ASSERT(ii+1 == count_modules); + } + + // Releasing resources + for (ii = 0; ii < num_modules; ++ii) { + destroy_module(modules[ii]); + } + pfn.hsa_ext_program_destroy(program); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_multiple_modules.c b/src/extensions/finalization/test_finalization_multiple_modules.c new file mode 100644 index 0000000..cc913ad --- /dev/null +++ b/src/extensions/finalization/test_finalization_multiple_modules.c @@ -0,0 +1,208 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: finalization_multiple_modules + * Scope: Extension (Finalization) + * Support: This test assumes that the system supports the finalization + * extension and that a viable agent that supports that extension + * can be found. + * + * Purpose: Verify that a program that has several independent modules + * added to it can be finalized, and that the kernels from each + * of the modules can be executed correctly. + * + * Test Description: + * 1) Create a hsa_ext_program_t object. + * 2) Load several hsa_ext_module_t objects and add them to + * the program. The modules should not have interdependencies. + * 3) Finalize the program. + * 4) Extract symbols (kernels) associated with each module from the finalized + * program. + * 5) Dispatch each of the kernels on a valid agent. + * + * Expected Results: The program should be properly finalized and all kernels + * should execute successfully. + * + */ + +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_multiple_modules() { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization funtion pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + const int num_modules = 2; + char* module_names[] = { + "no_op.brig", + "no_op2.brig"}; + char* symbol_names[] = { + "&__no_op_kernel", + "&__no_op2_kernel"}; + hsa_ext_module_t modules[num_modules]; + + int ii; + for (ii = 0; ii < num_modules; ++ii) { + // Load the brig module from a file + // modules[ii].handle = (uint64_t)-1; + int load_status = load_module_from_file(module_names[ii], &modules[ii]); + ASSERT(0 == load_status); + // ASSERT((uint64_t)-1 != modules[ii].handle); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, modules[ii]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Finalize the program and extract the code object + hsa_code_object_t code_object; + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + + // Create the empty executable + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + ASSERT(HSA_STATUS_SUCCESS == status); + // Load the code object + status = hsa_executable_load_code_object(executable, agent, code_object, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(executable, NULL); + ASSERT(HSA_STATUS_SUCCESS == status); + // Releasing resources + for (ii = 0; ii < num_modules; ++ii) { + destroy_module(modules[ii]); + } + pfn.hsa_ext_program_destroy(program); + + // Find the executable symbols for dispatch + symbol_record_t symbol_records[num_modules]; + for (ii = 0; ii < num_modules; ++ii) { + symbol_records[ii].module_name = module_names[ii]; + symbol_records[ii].symbol.handle = (uint64_t)-1; + } + char** symbol_names_ptr = (char**)(&symbol_names); + status = get_executable_symbols(executable, + agent, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + num_modules, + symbol_names_ptr, + symbol_records); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a queue for dispatch + hsa_queue_t* queue; + status = hsa_queue_create(agent, 256, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + for (ii = 0; ii < num_modules; ++ii) { + // Query the kernel object handle + uint64_t kernel_object; + status = hsa_executable_symbol_get_info(symbol_records[ii].symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel_object); + ASSERT(HSA_STATUS_SUCCESS == status); + // Dispatch the kernel + launch_kernel_no_kernarg(queue, kernel_object, 1); + } + + // Release resources + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_finalization_out_of_resources.c b/src/extensions/finalization/test_finalization_out_of_resources.c new file mode 100644 index 0000000..f8ccb09 --- /dev/null +++ b/src/extensions/finalization/test_finalization_out_of_resources.c @@ -0,0 +1,136 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include "test_helper_func.h" + +int test_finalization_out_of_resources() { + // Initialize the HSA Runtime + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find the agent that supports kernel dispatch + hsa_agent_t agent; + agent.handle = (uint64_t)-1; + status = hsa_iterate_agents(get_kernel_dispatch_agent, &agent); + ASSERT((uint64_t)-1 != agent.handle); + + // Get the finalization function pointer table + hsa_ext_finalizer_pfn_t pfn; + status = get_finalization_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a program object + hsa_ext_program_t program; + program.handle = (uint64_t)-1; + status = pfn.hsa_ext_program_create( + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, + &program); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT((uint64_t)-1 != program.handle); + + // Load a brig module from a valid source + char module_name[256] = "no_op.brig"; + char symbol_name[256] = "&__no_op_kernel"; + hsa_ext_module_t module; + status = load_module_from_file(module_name, &module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add the module to the program + status = pfn.hsa_ext_program_add_module(program, module); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the ISA from the current agent + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Set up a (empty) control directive + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + + // Finalize the program and extract the code object + const uint32_t OBJECT_MAX = 4096; + int object_count = 0; + hsa_code_object_t code_object[OBJECT_MAX]; + while(object_count < OBJECT_MAX) { + status = pfn.hsa_ext_program_finalize(program, isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, + NULL, + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object[object_count]); + if(HSA_STATUS_ERROR_OUT_OF_RESOURCES == status) { + break; + } else if(HSA_STATUS_SUCCESS == status) { + object_count++; + continue; + } else { + ASSERT(0); + } + } + + // Releasing resources + destroy_module(module); + pfn.hsa_ext_program_destroy(program); + uint32_t i; + for (i = 0; i < object_count; i++) { + status = hsa_code_object_destroy(code_object[i]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/finalization/test_helper_func.c b/src/extensions/finalization/test_helper_func.c new file mode 100644 index 0000000..b6fd9d4 --- /dev/null +++ b/src/extensions/finalization/test_helper_func.c @@ -0,0 +1,117 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include "test_helper_func.h" + +void launch_kernel_no_kernarg(hsa_queue_t* queue, uint64_t kernel_object, int num_packets) { + hsa_status_t status; + + // Signal and dispatch packet + hsa_signal_t* signals = (hsa_signal_t*) malloc(sizeof(hsa_signal_t) * num_packets); + hsa_kernel_dispatch_packet_t dispatch_packet; + + int jj; + for (jj = 0; jj < num_packets; ++jj) { + status = hsa_signal_create(1, 0, NULL, &signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Get size of dispatch_packet + const size_t packet_size = sizeof(hsa_kernel_dispatch_packet_t); + + // Fill info for the default dispatch_packet + memset(&dispatch_packet, 0, packet_size); + dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.workgroup_size_x = 256; + dispatch_packet.workgroup_size_y = 1; + dispatch_packet.workgroup_size_z = 1; + dispatch_packet.grid_size_x = 256; + dispatch_packet.grid_size_y = 1; + dispatch_packet.grid_size_z = 1; + dispatch_packet.group_segment_size = 0; + dispatch_packet.private_segment_size = 0; + dispatch_packet.kernel_object = kernel_object; + dispatch_packet.kernarg_address = 0; + + // Enqueue dispatch packets + hsa_kernel_dispatch_packet_t* queue_packet; + for (jj = 0; jj < num_packets; ++jj) { + // Increment the write index of the queue + uint64_t write_index = hsa_queue_add_write_index_relaxed(queue, 1); + // Set the value fo the dispatch packet to the correct signal + dispatch_packet.completion_signal = signals[jj]; + // Obtain the address of the queue packet entry + queue_packet = (hsa_kernel_dispatch_packet_t*)(queue->base_address + write_index * packet_size); + // Copy the initialized packet to the queue packet entry + memcpy(queue_packet, &dispatch_packet, packet_size); + // Set the queue packet entries header.type value to HSA_PACKET_TYPE_KERNEL_DISPATCH + // This allows the command processor to process this packet. + queue_packet->header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + // Ring the doorbell + hsa_signal_store_relaxed(queue->doorbell_signal, write_index); + } + + // Wait until all dispatch packets finish executing + for (jj = 0; jj < num_packets; ++jj) { + hsa_signal_value_t value = hsa_signal_wait_relaxed(signals[jj], HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + ASSERT(0 == value); + } + + // Destroy signals + for (jj = 0; jj < num_packets; ++jj) { + status = hsa_signal_destroy(signals[jj]); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + free(signals); + + return; +} diff --git a/src/extensions/finalization/test_helper_func.h b/src/extensions/finalization/test_helper_func.h new file mode 100644 index 0000000..ef224b3 --- /dev/null +++ b/src/extensions/finalization/test_helper_func.h @@ -0,0 +1,53 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _TEST_HELPER_FUNC_H_ +#define _TEST_HELPER_FUNC_H_ + +#include + +void launch_kernel_no_kernarg(hsa_queue_t* queue, uint64_t kernel_object, int num_packets); + +#endif // _TEST_HELPER_FUNC_H_ diff --git a/src/extensions/images/clear/hsa_image_clear.c b/src/extensions/images/clear/hsa_image_clear.c new file mode 100755 index 0000000..2a0d4de --- /dev/null +++ b/src/extensions/images/clear/hsa_image_clear.c @@ -0,0 +1,1444 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_image_clear.h" + +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT8, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(SNORM_INT16, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT8, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, DEPTH, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT16, DEPTH, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH_STENCIL, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH_STENCIL, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(HALF_FLOAT, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, A, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, A, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, A, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, A, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, A, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, A, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, R, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, R, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, R, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, R, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, R, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, R, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RG, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RG, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RG, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RG, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RG, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RG, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGX, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGX, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGX, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGX, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGX, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGX, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGBA, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGBA, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGBA, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGBA, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGBA, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, RGBA, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 2D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 3D, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 2DA, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1DB, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, DEPTH, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, DEPTH, 2DADEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, DEPTH_STENCIL, 2DDEPTH, _); +DEFINE_IMAGE_CLEAR_TEST(FLOAT, DEPTH_STENCIL, 2DADEPTH, _); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT8, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 2D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 3D, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(SNORM_INT16, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT8, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, DEPTH, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT16, DEPTH, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH_STENCIL, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH_STENCIL, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 2D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 3D, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 2DA, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1DB, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(HALF_FLOAT, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, A, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, A, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, A, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, A, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, A, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, A, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, R, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, R, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, R, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, R, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, R, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, R, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RX, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RX, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RX, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RG, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RG, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RG, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RG, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RG, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RG, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGX, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGX, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGX, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGX, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGX, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGX, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RA, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RA, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RA, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGBA, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGBA, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGBA, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGBA, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGBA, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, RGBA, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 2D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 3D, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 2DA, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1DB, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, DEPTH, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, DEPTH, 2DADEPTH, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, DEPTH_STENCIL, 2DDEPTH, _); + ADD_IMAGE_CLEAR_TEST(FLOAT, DEPTH_STENCIL, 2DADEPTH, _); + RUN_TESTS(); +} diff --git a/src/extensions/images/clear/hsa_image_clear.h b/src/extensions/images/clear/hsa_image_clear.h new file mode 100755 index 0000000..924c888 --- /dev/null +++ b/src/extensions/images/clear/hsa_image_clear.h @@ -0,0 +1,770 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_IMAGE_CLEAR_H_ +#define _HSA_IMAGE_CLEAR_H_ + +#include +#include +#include +#include + +#define DEFINE_IMAGE_CLEAR_TEST(__type__, __order__, __geometry__, __sep__) \ +DEFINE_TEST(image_clear_##__type__##__sep__##__order__##__sep__##__geometry__) + +#define ADD_IMAGE_CLEAR_TEST(__type__, __order__, __geometry__, __sep__) \ +ADD_TEST(image_clear_##__type__##__sep__##__order__##__sep__##__geometry__) + +#define IMAGE_CLEAR_TEST(__type__, __order__, __geometry__, __sep__) \ +int test_image_clear_##__type__##__sep__##__order__##__sep__##__geometry__() { \ + hsa_ext_image_format_t image_format; \ + hsa_ext_image_geometry_t image_geometry; \ + image_format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_##__type__; \ + image_format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_##__order__; \ + image_geometry = HSA_EXT_IMAGE_GEOMETRY_##__geometry__; \ + printf("\nTesting channel type = %s, channel order = %s, geometry = %s\n", #__type__, #__order__, #__geometry__); \ + test_image_clear(&image_format, image_geometry); \ + return 0; \ +} + +extern int test_image_clear(hsa_ext_image_format_t* image_format, + hsa_ext_image_geometry_t image_geometry); + +IMAGE_CLEAR_TEST(SNORM_INT8, A, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, A, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, A, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, A, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, A, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, A, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, R, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, R, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, R, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, R, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, R, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, R, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RX, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RX, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RX, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RX, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RG, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RG, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RG, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RG, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGX, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RA, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RA, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RA, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RA, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, BGRA, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ARGB, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, ABGR, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT8, INTENSITY, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT8, LUMINANCE, 2DDEPTH, _); +IMAGE_CLEAR_TEST(SNORM_INT8, LUMINANCE, 2DADEPTH, _); +IMAGE_CLEAR_TEST(SNORM_INT16, A, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, A, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, A, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, A, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, A, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, A, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, R, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, R, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, R, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, R, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, R, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, R, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RX, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RX, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RX, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RX, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RG, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RG, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RG, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RG, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGX, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RA, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RA, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RA, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RA, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 2D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 3D, _); +IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 2DA, _); +IMAGE_CLEAR_TEST(SNORM_INT16, INTENSITY, 1DB, _); +IMAGE_CLEAR_TEST(SNORM_INT16, LUMINANCE, 2DDEPTH, _); +IMAGE_CLEAR_TEST(SNORM_INT16, LUMINANCE, 2DADEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT8, A, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, A, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, A, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, A, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, A, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, A, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, R, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, R, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, R, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, R, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, R, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, R, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RX, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RG, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RG, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RG, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RG, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGX, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RA, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RA, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RA, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RA, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, BGRA, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ARGB, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, ABGR, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGB, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBX, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SRGBA, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, SBGRA, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT8, INTENSITY, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT8, LUMINANCE, 2DDEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT8, LUMINANCE, 2DADEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT16, A, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, A, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, A, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, A, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, A, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, A, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, R, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, R, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, R, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, R, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, R, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, R, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RX, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RG, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RG, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RG, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RG, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGX, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RA, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RA, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RA, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RA, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 2D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 3D, _); +IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_INT16, INTENSITY, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_INT16, LUMINANCE, 2DDEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT16, LUMINANCE, 2DADEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT16, DEPTH, 2DDEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT16, DEPTH, 2DADEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH, 2DDEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH, 2DADEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH_STENCIL, 2DDEPTH, _); +IMAGE_CLEAR_TEST(UNORM_INT24, DEPTH_STENCIL, 2DADEPTH, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 2D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 3D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGB, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_555, RGBX, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 2D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 3D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGB, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_565, RGBX, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 2D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 3D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGB, 1DB, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 2D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 3D, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 2DA, _); +IMAGE_CLEAR_TEST(UNORM_SHORT_101010, RGBX, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, A, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, A, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, A, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, A, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, R, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, R, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, R, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, R, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RX, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RG, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGX, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RA, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, BGRA, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ARGB, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT8, ABGR, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, A, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, A, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, A, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, A, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, R, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, R, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, R, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, R, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RX, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RG, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGX, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RA, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT16, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, A, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, A, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, A, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, A, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, R, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, R, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, R, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, R, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RX, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RG, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGX, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RA, 1DB, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 2D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 3D, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(SIGNED_INT32, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, A, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, R, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RX, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RG, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGX, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RA, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, BGRA, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ARGB, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT8, ABGR, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, A, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, R, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RX, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RG, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGX, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RA, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT16, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, A, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, R, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RX, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RG, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGX, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RA, 1DB, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 2D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 3D, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(UNSIGNED_INT32, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, A, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, A, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, A, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, A, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, R, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, R, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, R, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, R, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RX, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RG, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGX, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RA, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 2D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 3D, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 2DA, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, INTENSITY, 1DB, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, LUMINANCE, 2DDEPTH, _); +IMAGE_CLEAR_TEST(HALF_FLOAT, LUMINANCE, 2DADEPTH, _); +IMAGE_CLEAR_TEST(FLOAT, A, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, A, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, A, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, A, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, A, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, A, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, R, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, R, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, R, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, R, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, R, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, R, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, RX, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, RX, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, RX, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, RX, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, RX, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, RX, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, RG, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, RG, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, RG, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, RG, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, RG, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, RG, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, RGX, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, RGX, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, RGX, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, RGX, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, RGX, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, RGX, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, RA, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, RA, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, RA, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, RA, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, RA, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, RA, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, RGBA, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, RGBA, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, RGBA, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, RGBA, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, RGBA, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, RGBA, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1D, _); +IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 2D, _); +IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 3D, _); +IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1DA, _); +IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 2DA, _); +IMAGE_CLEAR_TEST(FLOAT, INTENSITY, 1DB, _); +IMAGE_CLEAR_TEST(FLOAT, LUMINANCE, 2DDEPTH, _); +IMAGE_CLEAR_TEST(FLOAT, LUMINANCE, 2DADEPTH, _); +IMAGE_CLEAR_TEST(FLOAT, DEPTH, 2DDEPTH, _); +IMAGE_CLEAR_TEST(FLOAT, DEPTH, 2DADEPTH, _); +IMAGE_CLEAR_TEST(FLOAT, DEPTH_STENCIL, 2DDEPTH, _); +IMAGE_CLEAR_TEST(FLOAT, DEPTH_STENCIL, 2DADEPTH, _); + +#endif // _HSA_IMAGE_CLEAR_H_ diff --git a/src/extensions/images/clear/test_image_clear.c b/src/extensions/images/clear/test_image_clear.c new file mode 100755 index 0000000..653c163 --- /dev/null +++ b/src/extensions/images/clear/test_image_clear.c @@ -0,0 +1,544 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: image_clear___ + * + * Purpose: Verifies that if an image with the format and geometry specified + * by the channel type, channel order and image geometry is supported on an agent + * it an be created and successfully cleared using the hsa_ext_image_clear API. + * + * Test Description: + * 1) Check each agent on the platform and determine if they support an image with + * channel type = , channel order = and geometry = . Use the + * hsa_ext_image_get_capability to do this. + * + * 2) If the agent supports the format and geometry, query the agent using + * hsa_ext_image_data_get_info to determine the maximum size of an image on the agent. + * + * 3) Use hsa_ext_image_get_info to determine the size and alignment required for the image + * backing buffer. The image permissions should be read/write and the dimensions should + * be the maximum queried from the previous step. + * + * 4) Allocate the backing buffer with hsa_memory_allocate from an appropriate memory region + * associated with the agent. + * + * 5) Create an image on the agent using the backing buffer allocated in the previous + * step. + * + * 6) Use the hsa_ext_image_clear API to clear the entire image with a set data pattern. + * + * 7) Use the hsa_ext_image_export API to export the entire image. Verify that it + * was properly cleared. + * + * 8) Use the hsa_ext_image_clear API to clear a portion of the image with a different + * set data pattern. + * + * 9) Use the hsa_ext_image_export API to export that portion of the image. Verify + * that it was properly cleared to the new values. + * + * 10) Use the hsa_ext_image_export API to export an adjacent portion of the image. Verify + * it was not cleared to the new values. + * + * 11) Repeat steps 8 to 10 until the entire image has been cleared to the new values. + * + * Expected results: The regions specified by the hsa_ext_image_clear API are the only + * ones that should be affected. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int test_image_clear(hsa_ext_image_format_t* image_format, + hsa_ext_image_geometry_t image_geometry) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the images function pointer table + hsa_ext_image_pfn_t pfn; + status = get_image_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("verify_image_region.brig", &module)); + + // Get the list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Structures for querying format capabilities. + uint32_t capability_mask; + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (!(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get format capability mask. + status = pfn.hsa_ext_image_get_capability(agent_list.agents[ii], + image_geometry, + image_format, + &capability_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED == capability_mask) { + printf("Image format is not supported.\n"); + continue; + } + + if (!(HSA_EXT_IMAGE_CAPABILITY_READ_ONLY & capability_mask) && + !(HSA_EXT_IMAGE_CAPABILITY_READ_WRITE & capability_mask) && + !(HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE & capability_mask)) { + printf("Image format cannot be tested.\n"); + continue; + } + + uint32_t grid_max_size; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_SIZE, &grid_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t grid_max_dim[3]; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t work_group_max_size; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &work_group_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t work_group_max_dim[3]; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_DIM, &work_group_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a global memory region for the image backing buffer + hsa_region_t global_region; + global_region.handle = (uint64_t) -1; + status = hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + ASSERT(global_region.handle != (uint64_t) -1); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t) -1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t) -1 != kernarg_region.handle); + + // Get information regarding the image on this agent using the specified + // geometry. + int image_dimension = 0; + uint32_t max_elements[3]; + uint32_t region_step[3]; + char* validation_kernel[1]; + + get_geometry_info(agent_list.agents[ii], + image_format, + image_geometry, + &image_dimension, + max_elements, + &validation_kernel[0]); + + // Adjust max_elements values + max_elements[0] = (max_elements[0] < grid_max_dim[0]) ? max_elements[0] : grid_max_dim[0]; + max_elements[1] = (max_elements[1] < grid_max_dim[1]) ? max_elements[1] : grid_max_dim[1]; + max_elements[2] = (max_elements[2] < grid_max_dim[2]) ? max_elements[2] : grid_max_dim[2]; + max_elements[0] = (max_elements[0] < 1024) ? max_elements[0] : 1024; + max_elements[1] = (max_elements[1] < 1024) ? max_elements[1] : 1024; + max_elements[2] = (max_elements[2] < 8) ? max_elements[2] : 8; + ASSERT((max_elements[0] * max_elements[1] * max_elements[2]) < grid_max_size); + + // Adjust region step size values + region_step[0] = region_step[1] = 256; + region_step[2] = 1; + region_step[0] = (region_step[0] < max_elements[0]) ? region_step[0] : max_elements[0]; + region_step[1] = (region_step[1] < max_elements[1]) ? region_step[1] : max_elements[1]; + region_step[2] = (region_step[2] < max_elements[2]) ? region_step[2] : max_elements[2]; + + // Adjust the work_group_max_dim sizes + work_group_max_dim[0] = (work_group_max_dim[0] < 16) ? work_group_max_dim[0] : 16; + work_group_max_dim[1] = (work_group_max_dim[1] < 16) ? work_group_max_dim[1] : 16; + work_group_max_dim[2] = (work_group_max_dim[2] < 1) ? work_group_max_dim[2] : 1; + work_group_max_dim[0] = (work_group_max_dim[0] < max_elements[0]) ? work_group_max_dim[0] : max_elements[0]; + work_group_max_dim[1] = (work_group_max_dim[1] < max_elements[1]) ? work_group_max_dim[1] : max_elements[1]; + work_group_max_dim[2] = (work_group_max_dim[2] < max_elements[2]) ? work_group_max_dim[2] : max_elements[2]; + ASSERT((work_group_max_dim[0] * work_group_max_dim[1] * work_group_max_dim[2]) <= work_group_max_size); + + // Create a queue to execute validation kernels. + hsa_queue_t *queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, &validation_kernel[0], &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Define the validation kernel arguments + typedef struct __attribute__ ((aligned(16))) validate_args_s { + hsa_ext_image_t image; // The image handle + void* rgn_values; // The floating point pixel pattern in the specified region + void* bkg_values; // The floating point pixel pattern in the rest of the image + uint32_t* start_region; // The regions starting coords + uint32_t* end_region; // The regions ending coords + uint32_t* bits; // The channel values to compare + uint32_t* cmp_mask; // The channel values to compare + uint32_t* error; // An error field representing different rbga channel errors + } validate_args_t; + + + // Allocate the kernel argument buffer from the correct region + validate_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the pattern buffers + void* bg_pattern; + void* clr_pattern; + + if (image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32) { + status = hsa_memory_allocate(global_region, 4 * sizeof(uint32_t), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 4 * sizeof(uint32_t), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + uint32_t *bg = (uint32_t*) bg_pattern; + uint32_t *clr = (uint32_t*) clr_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0; + clr[0] = clr[1] = clr[2] = clr[3] = 255; + } else if (image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32) { + status = hsa_memory_allocate(global_region, 4 * sizeof(int32_t), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 4 * sizeof(int32_t), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + int32_t *bg = (int32_t*) bg_pattern; + int32_t *clr = (int32_t*) clr_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0; + clr[0] = clr[1] = clr[2] = clr[3] = 127; + } else { + status = hsa_memory_allocate(global_region, 4 * sizeof(float), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 4 * sizeof(float), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + float *bg = (float*) bg_pattern; + float *clr = (float*) clr_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0.0f; + clr[0] = clr[1] = clr[2] = clr[3] = 0.5f; + } + + // Create the start and end region buffers + uint32_t* start_region; + uint32_t* end_region; + status = hsa_memory_allocate(global_region, 3 * sizeof(uint32_t), (void**) &start_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 3 * sizeof(uint32_t), (void**) &end_region); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the error, mask and bits buffers + uint32_t* error; + uint32_t* cmp_mask; + uint32_t* bits; + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &error); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &cmp_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &bits); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add all of the components to the argument buffer + *cmp_mask = get_cmp_info(image_format->channel_order); + *bits = get_channel_type_bits(image_format->channel_type); + kernarg_buffer->rgn_values = clr_pattern; + kernarg_buffer->bkg_values = bg_pattern; + kernarg_buffer->start_region = start_region; + kernarg_buffer->end_region = end_region; + kernarg_buffer->bits = bits; + kernarg_buffer->cmp_mask = cmp_mask; + kernarg_buffer->error = error; + + // Determine the size and alignment for the image backing buffer + hsa_ext_image_descriptor_t image_descriptor; + image_descriptor.geometry = image_geometry; + image_descriptor.width = max_elements[0]; + image_descriptor.height = max_elements[1]; + image_descriptor.depth = max_elements[2]; + image_descriptor.array_size = 1; + image_descriptor.format.channel_type = image_format->channel_type; + image_descriptor.format.channel_order = image_format->channel_order; + + hsa_ext_image_data_info_t image_info; + hsa_access_permission_t access_permissions = HSA_ACCESS_PERMISSION_RW; + + status = pfn.hsa_ext_image_data_get_info(agent_list.agents[ii], + &image_descriptor, + access_permissions, + &image_info); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify that the memory region will correctly align the + // image data. + size_t region_align; + status = hsa_region_get_info(global_region, HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT, ®ion_align); + + ASSERT((region_align >= image_info.alignment) && (region_align % image_info.alignment == 0)); + + // Allocate the backing buffer + void* image_data; + status = hsa_memory_allocate(global_region, image_info.size, (void**) &image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create an image with the backing buffer. + status = pfn.hsa_ext_image_create(agent_list.agents[ii], + &image_descriptor, + image_data, + access_permissions, + &(kernarg_buffer->image)); + + // Create a completion signal + hsa_signal_t completion_signal; + status = hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.setup |= image_dimension << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + dispatch_packet.workgroup_size_x = work_group_max_dim[0]; + dispatch_packet.workgroup_size_y = work_group_max_dim[1]; + dispatch_packet.workgroup_size_z = work_group_max_dim[2]; + dispatch_packet.grid_size_x = max_elements[0]; + dispatch_packet.grid_size_y = max_elements[1]; + dispatch_packet.grid_size_z = max_elements[2]; + + // Define the regions. + hsa_ext_image_region_t region_all; + hsa_ext_image_region_t region_partial; + region_all.offset.x = 0; + region_all.offset.y = 0; + region_all.offset.z = 0; + region_all.range.x = max_elements[0]; + region_all.range.y = max_elements[1]; + region_all.range.z = max_elements[2]; + + size_t x_offset, y_offset, z_offset; + + // Clear various regions of the image, checking for validity each iteration + for (x_offset = 0; x_offset < max_elements[0]; x_offset += region_step[0]) { + region_partial.offset.x = x_offset; + region_partial.range.x = (region_step[0] <= (max_elements[0] - x_offset)) ? region_step[0] : max_elements[0] % region_step[0]; + if (region_partial.range.x <= 0) { + continue; + } + start_region[0] = region_partial.offset.x; + end_region[0] = start_region[0] + region_partial.range.x; + + for (y_offset = 0; y_offset < max_elements[1]; y_offset += region_step[1]) { + region_partial.offset.y = y_offset; + region_partial.range.y = (region_step[1] <= (max_elements[1] - y_offset)) ? region_step[1] : max_elements[1] % region_step[1]; + if (region_partial.range.y <= 0) { + continue; + } + start_region[1] = region_partial.offset.y; + end_region[1] = start_region[1] + region_partial.range.y; + + for (z_offset = 0; z_offset < max_elements[2]; z_offset += region_step[2]) { + region_partial.offset.z = z_offset; + region_partial.range.z = (region_step[2] <= (max_elements[2] - y_offset)) ? region_step[2] : max_elements[1] % region_step[2]; + if (region_partial.range.z <= 0) { + continue; + } + start_region[2] = region_partial.offset.z; + end_region[2] = start_region[2] + region_partial.range.z; + + // Clear the entire image to the bg_pattern. + status = pfn.hsa_ext_image_clear(agent_list.agents[ii], + kernarg_buffer->image, + bg_pattern, + ®ion_all); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Clear the partial region of the image with the clr_pattern. + status = pfn.hsa_ext_image_clear(agent_list.agents[ii], + kernarg_buffer->image, + clr_pattern, + ®ion_partial); + + ASSERT(HSA_STATUS_SUCCESS == status); + + *kernarg_buffer->error = 0; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Wait on the completion signal + hsa_signal_value_t value; + do { + value = hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + } while (0 != value); + + // Verify that no errors occured. + if (0 != *kernarg_buffer->error) { + printf("\nRegion: (%d,%d,%d) -> (%d,%d,%d) Error: %d\n", + start_region[0], + start_region[1], + start_region[2], + end_region[0], + end_region[1], + end_region[2], + *kernarg_buffer->error); + ASSERT(0 == *kernarg_buffer->error); + } + + printf("."); + + // Reset the signal value + hsa_signal_store_release(completion_signal, 1); + } + } + } + + // Destroy the completion signal + hsa_signal_destroy(completion_signal); + + // Destroy the image + status = pfn.hsa_ext_image_destroy(agent_list.agents[ii], kernarg_buffer->image); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the backing buffer + status = hsa_memory_free(image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the kernarg_buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the clear and background patterns + status = hsa_memory_free(bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free all the utility buffers + status = hsa_memory_free(start_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(end_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(error); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(cmp_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(bits); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Shutdown HSA + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/images/copy/hsa_image_copy.c b/src/extensions/images/copy/hsa_image_copy.c new file mode 100755 index 0000000..c3cc7e3 --- /dev/null +++ b/src/extensions/images/copy/hsa_image_copy.c @@ -0,0 +1,1444 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_image_copy.h" + +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT8, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 2D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 3D, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(SNORM_INT16, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT8, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, DEPTH, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT16, DEPTH, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT24, DEPTH, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT24, DEPTH, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT24, DEPTH_STENCIL, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_INT24, DEPTH_STENCIL, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 2D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 3D, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 2DA, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1DB, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(HALF_FLOAT, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, A, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, A, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, A, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, A, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, A, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, A, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, R, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, R, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, R, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, R, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, R, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, R, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RX, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RX, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RX, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RG, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RG, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RG, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RG, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RG, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RG, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGX, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGX, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGX, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGX, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGX, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGX, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RA, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RA, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RA, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGBA, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGBA, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGBA, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGBA, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGBA, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, RGBA, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, INTENSITY, 1D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, INTENSITY, 2D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, INTENSITY, 3D, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, INTENSITY, 1DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, INTENSITY, 2DA, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, INTENSITY, 1DB, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, LUMINANCE, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, LUMINANCE, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, DEPTH, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, DEPTH, 2DADEPTH, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, DEPTH_STENCIL, 2DDEPTH, _); +DEFINE_IMAGE_COPY_TEST(FLOAT, DEPTH_STENCIL, 2DADEPTH, _); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_IMAGE_COPY_TEST(SNORM_INT8, A, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, R, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, R, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, A, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, A, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, A, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, A, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, A, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, A, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, R, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, R, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, R, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, R, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, R, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, R, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RX, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RX, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RX, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RG, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RG, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RG, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RA, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RA, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RA, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(SNORM_INT8, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, A, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, A, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, A, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, A, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, A, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, A, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, R, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, R, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, R, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, R, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, R, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, R, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RX, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RX, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RX, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RG, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RG, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RG, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RA, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RA, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RA, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 2D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 3D, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 2DA, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1DB, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(SNORM_INT16, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, A, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, A, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, A, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, A, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, A, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, A, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, R, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, R, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, R, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, R, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, R, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, R, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RG, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RG, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RG, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RA, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RA, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RA, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT8, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, A, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, A, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, A, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, A, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, A, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, A, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, R, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, R, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, R, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, R, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, R, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, R, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RG, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RG, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RG, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RA, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RA, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RA, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, DEPTH, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT16, DEPTH, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT24, DEPTH, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT24, DEPTH, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT24, DEPTH_STENCIL, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_INT24, DEPTH_STENCIL, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1DB, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 2D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 3D, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, A, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, A, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, A, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, A, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, A, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, A, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, R, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, R, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, R, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, R, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, R, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, R, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RX, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RX, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RX, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RG, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RG, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RG, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RA, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RA, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RA, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, A, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, A, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, A, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, A, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, A, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, A, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, R, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, R, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, R, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, R, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, R, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, R, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RX, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RX, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RX, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RG, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RG, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RG, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RA, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RA, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RA, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, A, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, A, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, A, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, A, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, A, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, A, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, R, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, R, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, R, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, R, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, R, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, R, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RX, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RX, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RX, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RG, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RG, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RG, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RA, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RA, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RA, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, A, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, A, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, A, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, A, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, A, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, A, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, R, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, R, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, R, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, R, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, R, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, R, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RX, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RX, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RX, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RG, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RG, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RG, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RA, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RA, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RA, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 2D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 3D, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 2DA, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1DB, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(HALF_FLOAT, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(FLOAT, A, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, A, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, A, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, A, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, A, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, A, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, R, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, R, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, R, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, R, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, R, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, R, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, RX, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RX, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RX, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RX, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RX, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RX, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, RG, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RG, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RG, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RG, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RG, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RG, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGX, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGX, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGX, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGX, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGX, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGX, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, RA, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RA, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RA, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RA, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RA, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RA, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGBA, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGBA, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGBA, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGBA, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGBA, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, RGBA, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, INTENSITY, 1D, _); + ADD_IMAGE_COPY_TEST(FLOAT, INTENSITY, 2D, _); + ADD_IMAGE_COPY_TEST(FLOAT, INTENSITY, 3D, _); + ADD_IMAGE_COPY_TEST(FLOAT, INTENSITY, 1DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, INTENSITY, 2DA, _); + ADD_IMAGE_COPY_TEST(FLOAT, INTENSITY, 1DB, _); + ADD_IMAGE_COPY_TEST(FLOAT, LUMINANCE, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(FLOAT, LUMINANCE, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(FLOAT, DEPTH, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(FLOAT, DEPTH, 2DADEPTH, _); + ADD_IMAGE_COPY_TEST(FLOAT, DEPTH_STENCIL, 2DDEPTH, _); + ADD_IMAGE_COPY_TEST(FLOAT, DEPTH_STENCIL, 2DADEPTH, _); + RUN_TESTS(); +} diff --git a/src/extensions/images/copy/hsa_image_copy.h b/src/extensions/images/copy/hsa_image_copy.h new file mode 100755 index 0000000..a69140b --- /dev/null +++ b/src/extensions/images/copy/hsa_image_copy.h @@ -0,0 +1,770 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _HSA_IMAGE_COPY_H_ +#define _HSA_IMAGE_COPY_H_ + +#include +#include +#include +#include + +#define DEFINE_IMAGE_COPY_TEST(__type__, __order__, __geometry__, __sep__) \ +DEFINE_TEST(image_copy_##__type__##__sep__##__order__##__sep__##__geometry__) + +#define ADD_IMAGE_COPY_TEST(__type__, __order__, __geometry__, __sep__) \ +ADD_TEST(image_copy_##__type__##__sep__##__order__##__sep__##__geometry__) + +#define IMAGE_COPY_TEST(__type__, __order__, __geometry__, __sep__) \ +int test_image_copy_##__type__##__sep__##__order__##__sep__##__geometry__() { \ + hsa_ext_image_format_t image_format; \ + hsa_ext_image_geometry_t image_geometry; \ + image_format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_##__type__; \ + image_format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_##__order__; \ + image_geometry = HSA_EXT_IMAGE_GEOMETRY_##__geometry__; \ + printf("\nTesting copy of , geometry = %s\n", #__type__, #__order__, #__geometry__); \ + test_image_copy(&image_format, image_geometry); \ + return 0; \ +} + +extern int test_image_copy(hsa_ext_image_format_t* image_format, + hsa_ext_image_geometry_t image_geometry); + +IMAGE_COPY_TEST(SNORM_INT8, A, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, A, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, A, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, A, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, A, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, A, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, R, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, R, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, R, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, R, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, R, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, R, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, RX, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, RX, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, RX, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, RX, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RX, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RX, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, RG, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, RG, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, RG, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, RG, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RG, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RG, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, RGX, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, RGX, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, RGX, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, RGX, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RGX, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RGX, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, RA, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, RA, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, RA, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, RA, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RA, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RA, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, RGBA, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, RGBA, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RGBA, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, RGBA, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, BGRA, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, BGRA, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, BGRA, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, BGRA, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, ARGB, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, ARGB, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, ARGB, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, ARGB, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, ABGR, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, ABGR, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, ABGR, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, ABGR, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1D, _); +IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 2D, _); +IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 3D, _); +IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT8, INTENSITY, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT8, LUMINANCE, 2DDEPTH, _); +IMAGE_COPY_TEST(SNORM_INT8, LUMINANCE, 2DADEPTH, _); +IMAGE_COPY_TEST(SNORM_INT16, A, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, A, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, A, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, A, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, A, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, A, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, R, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, R, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, R, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, R, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, R, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, R, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, RX, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, RX, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, RX, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, RX, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RX, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RX, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, RG, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, RG, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, RG, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, RG, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RG, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RG, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, RGX, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, RGX, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, RGX, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, RGX, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RGX, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RGX, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, RA, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, RA, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, RA, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, RA, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RA, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RA, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, RGBA, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, RGBA, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RGBA, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, RGBA, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1D, _); +IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 2D, _); +IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 3D, _); +IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1DA, _); +IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 2DA, _); +IMAGE_COPY_TEST(SNORM_INT16, INTENSITY, 1DB, _); +IMAGE_COPY_TEST(SNORM_INT16, LUMINANCE, 2DDEPTH, _); +IMAGE_COPY_TEST(SNORM_INT16, LUMINANCE, 2DADEPTH, _); +IMAGE_COPY_TEST(UNORM_INT8, A, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, A, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, A, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, A, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, A, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, A, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, R, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, R, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, R, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, R, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, R, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, R, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, RX, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, RX, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, RX, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, RX, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RX, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RX, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, RG, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, RG, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, RG, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, RG, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RG, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RG, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, RGX, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, RGX, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, RGX, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, RGX, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RGX, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RGX, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, RA, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, RA, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, RA, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, RA, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RA, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RA, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, RGBA, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, RGBA, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RGBA, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, RGBA, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, BGRA, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, BGRA, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, BGRA, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, BGRA, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, ARGB, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, ARGB, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, ARGB, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, ARGB, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, ABGR, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, ABGR, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, ABGR, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, ABGR, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGB, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGB, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGB, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGB, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBX, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SRGBA, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, SBGRA, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1D, _); +IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 2D, _); +IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 3D, _); +IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT8, INTENSITY, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT8, LUMINANCE, 2DDEPTH, _); +IMAGE_COPY_TEST(UNORM_INT8, LUMINANCE, 2DADEPTH, _); +IMAGE_COPY_TEST(UNORM_INT16, A, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, A, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, A, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, A, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, A, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, A, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, R, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, R, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, R, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, R, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, R, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, R, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, RX, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, RX, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, RX, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, RX, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RX, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RX, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, RG, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, RG, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, RG, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, RG, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RG, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RG, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, RGX, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, RGX, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, RGX, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, RGX, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RGX, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RGX, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, RA, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, RA, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, RA, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, RA, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RA, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RA, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, RGBA, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, RGBA, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RGBA, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, RGBA, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1D, _); +IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 2D, _); +IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 3D, _); +IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1DA, _); +IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 2DA, _); +IMAGE_COPY_TEST(UNORM_INT16, INTENSITY, 1DB, _); +IMAGE_COPY_TEST(UNORM_INT16, LUMINANCE, 2DDEPTH, _); +IMAGE_COPY_TEST(UNORM_INT16, LUMINANCE, 2DADEPTH, _); +IMAGE_COPY_TEST(UNORM_INT16, DEPTH, 2DDEPTH, _); +IMAGE_COPY_TEST(UNORM_INT16, DEPTH, 2DADEPTH, _); +IMAGE_COPY_TEST(UNORM_INT24, DEPTH, 2DDEPTH, _); +IMAGE_COPY_TEST(UNORM_INT24, DEPTH, 2DADEPTH, _); +IMAGE_COPY_TEST(UNORM_INT24, DEPTH_STENCIL, 2DDEPTH, _); +IMAGE_COPY_TEST(UNORM_INT24, DEPTH_STENCIL, 2DADEPTH, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1D, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 2D, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 3D, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 2DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGB, 1DB, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1D, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 2D, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 3D, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 2DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_555, RGBX, 1DB, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1D, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 2D, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 3D, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 2DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGB, 1DB, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1D, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 2D, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 3D, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 2DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_565, RGBX, 1DB, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1D, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 2D, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 3D, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 2DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGB, 1DB, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1D, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 2D, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 3D, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 2DA, _); +IMAGE_COPY_TEST(UNORM_SHORT_101010, RGBX, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, A, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, A, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, A, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, A, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, A, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, A, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, R, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, R, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, R, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, R, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, R, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, R, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, RX, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RX, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RX, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RX, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RX, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RX, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, RG, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RG, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RG, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RG, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RG, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RG, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGX, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGX, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGX, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGX, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, RA, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RA, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RA, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RA, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RA, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RA, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, RGBA, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, BGRA, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, ARGB, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT8, ABGR, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT16, A, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT16, A, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT16, A, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT16, A, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, A, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, A, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT16, R, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT16, R, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT16, R, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT16, R, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, R, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, R, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT16, RX, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RX, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RX, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RX, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RX, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RX, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT16, RG, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RG, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RG, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RG, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RG, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RG, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGX, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGX, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGX, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGX, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT16, RA, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RA, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RA, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RA, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RA, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RA, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT16, RGBA, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT32, A, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT32, A, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT32, A, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT32, A, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, A, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, A, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT32, R, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT32, R, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT32, R, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT32, R, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, R, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, R, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT32, RX, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RX, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RX, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RX, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RX, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RX, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT32, RG, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RG, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RG, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RG, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RG, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RG, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGX, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGX, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGX, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGX, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT32, RA, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RA, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RA, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RA, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RA, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RA, 1DB, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 2D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 3D, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 2DA, _); +IMAGE_COPY_TEST(SIGNED_INT32, RGBA, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, A, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, A, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, A, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, A, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, R, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, R, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, R, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, R, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RX, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RG, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGX, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RA, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, RGBA, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, BGRA, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ARGB, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT8, ABGR, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, A, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, A, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, A, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, A, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, R, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, R, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, R, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, R, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RX, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RG, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGX, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RA, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT16, RGBA, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, A, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, A, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, A, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, A, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, R, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, R, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, R, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, R, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RX, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RG, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGX, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RA, 1DB, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 2D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 3D, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 2DA, _); +IMAGE_COPY_TEST(UNSIGNED_INT32, RGBA, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, A, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, A, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, A, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, A, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, A, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, A, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, R, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, R, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, R, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, R, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, R, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, R, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, RX, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RX, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RX, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RX, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RX, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RX, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, RG, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RG, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RG, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RG, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RG, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RG, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGX, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGX, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGX, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGX, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, RA, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RA, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RA, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RA, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RA, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RA, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, RGBA, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1D, _); +IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 2D, _); +IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 3D, _); +IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 2DA, _); +IMAGE_COPY_TEST(HALF_FLOAT, INTENSITY, 1DB, _); +IMAGE_COPY_TEST(HALF_FLOAT, LUMINANCE, 2DDEPTH, _); +IMAGE_COPY_TEST(HALF_FLOAT, LUMINANCE, 2DADEPTH, _); +IMAGE_COPY_TEST(FLOAT, A, 1D, _); +IMAGE_COPY_TEST(FLOAT, A, 2D, _); +IMAGE_COPY_TEST(FLOAT, A, 3D, _); +IMAGE_COPY_TEST(FLOAT, A, 1DA, _); +IMAGE_COPY_TEST(FLOAT, A, 2DA, _); +IMAGE_COPY_TEST(FLOAT, A, 1DB, _); +IMAGE_COPY_TEST(FLOAT, R, 1D, _); +IMAGE_COPY_TEST(FLOAT, R, 2D, _); +IMAGE_COPY_TEST(FLOAT, R, 3D, _); +IMAGE_COPY_TEST(FLOAT, R, 1DA, _); +IMAGE_COPY_TEST(FLOAT, R, 2DA, _); +IMAGE_COPY_TEST(FLOAT, R, 1DB, _); +IMAGE_COPY_TEST(FLOAT, RX, 1D, _); +IMAGE_COPY_TEST(FLOAT, RX, 2D, _); +IMAGE_COPY_TEST(FLOAT, RX, 3D, _); +IMAGE_COPY_TEST(FLOAT, RX, 1DA, _); +IMAGE_COPY_TEST(FLOAT, RX, 2DA, _); +IMAGE_COPY_TEST(FLOAT, RX, 1DB, _); +IMAGE_COPY_TEST(FLOAT, RG, 1D, _); +IMAGE_COPY_TEST(FLOAT, RG, 2D, _); +IMAGE_COPY_TEST(FLOAT, RG, 3D, _); +IMAGE_COPY_TEST(FLOAT, RG, 1DA, _); +IMAGE_COPY_TEST(FLOAT, RG, 2DA, _); +IMAGE_COPY_TEST(FLOAT, RG, 1DB, _); +IMAGE_COPY_TEST(FLOAT, RGX, 1D, _); +IMAGE_COPY_TEST(FLOAT, RGX, 2D, _); +IMAGE_COPY_TEST(FLOAT, RGX, 3D, _); +IMAGE_COPY_TEST(FLOAT, RGX, 1DA, _); +IMAGE_COPY_TEST(FLOAT, RGX, 2DA, _); +IMAGE_COPY_TEST(FLOAT, RGX, 1DB, _); +IMAGE_COPY_TEST(FLOAT, RA, 1D, _); +IMAGE_COPY_TEST(FLOAT, RA, 2D, _); +IMAGE_COPY_TEST(FLOAT, RA, 3D, _); +IMAGE_COPY_TEST(FLOAT, RA, 1DA, _); +IMAGE_COPY_TEST(FLOAT, RA, 2DA, _); +IMAGE_COPY_TEST(FLOAT, RA, 1DB, _); +IMAGE_COPY_TEST(FLOAT, RGBA, 1D, _); +IMAGE_COPY_TEST(FLOAT, RGBA, 2D, _); +IMAGE_COPY_TEST(FLOAT, RGBA, 3D, _); +IMAGE_COPY_TEST(FLOAT, RGBA, 1DA, _); +IMAGE_COPY_TEST(FLOAT, RGBA, 2DA, _); +IMAGE_COPY_TEST(FLOAT, RGBA, 1DB, _); +IMAGE_COPY_TEST(FLOAT, INTENSITY, 1D, _); +IMAGE_COPY_TEST(FLOAT, INTENSITY, 2D, _); +IMAGE_COPY_TEST(FLOAT, INTENSITY, 3D, _); +IMAGE_COPY_TEST(FLOAT, INTENSITY, 1DA, _); +IMAGE_COPY_TEST(FLOAT, INTENSITY, 2DA, _); +IMAGE_COPY_TEST(FLOAT, INTENSITY, 1DB, _); +IMAGE_COPY_TEST(FLOAT, LUMINANCE, 2DDEPTH, _); +IMAGE_COPY_TEST(FLOAT, LUMINANCE, 2DADEPTH, _); +IMAGE_COPY_TEST(FLOAT, DEPTH, 2DDEPTH, _); +IMAGE_COPY_TEST(FLOAT, DEPTH, 2DADEPTH, _); +IMAGE_COPY_TEST(FLOAT, DEPTH_STENCIL, 2DDEPTH, _); +IMAGE_COPY_TEST(FLOAT, DEPTH_STENCIL, 2DADEPTH, _); + +#endif // _HSA_IMAGE_COPY_H_ diff --git a/src/extensions/images/copy/test_image_copy.c b/src/extensions/images/copy/test_image_copy.c new file mode 100644 index 0000000..d074070 --- /dev/null +++ b/src/extensions/images/copy/test_image_copy.c @@ -0,0 +1,572 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: image_copy___ + * + * Purpose: Verifies that if an image with the format and geometry specified + * by the channel type, channel order and image geometry is supported on an agent + * it an be successfully copied from one image to another using the hsa_ext_image_copy API. + * + * Test Description: + * 1) Check each agent on the platform and determine if they support an image with + * channel type = , channel order = and geometry = . + * Use the hsa_ext_image_get_capability to do this. + * + * 2) If the agent supports the target format and geometry, query the agent using + * hsa_ext_image_data_get_info to determine the maximum size of the image on the agent. + * + * 3) Use hsa_ext_image_get_info to determine the size and alignment required for the image's + * backing buffer. The image permissions should be read/write and the dimensions should + * be the maximum queried from the previous step. + * + * 4) Allocate the backing buffer with hsa_memory_allocate from an appropriate memory region + * associated with the agent. Do this for both a source and destination image. + * + * 5) Create both source and destination images on the agent using the backing buffer + * allocated in the previous step. + * + * 6) Use the hsa_ext_image_clear API to clear the both images with two different data patterns. + * + * 7) Use the hsa_ext_image_copy API to copy a region from the source image to the destination + * image. + * + * 8) Use the verify image kernel to verify the region was properly copied. + * + * 11) Repeat steps 6 to 8 until for all sub-regions of the images. + * + * Expected results: The regions specified by the hsa_ext_image_copy API are the only + * ones that should be affected. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int test_image_copy(hsa_ext_image_format_t* image_format, + hsa_ext_image_geometry_t image_geometry) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the images function pointer table + hsa_ext_image_pfn_t pfn; + status = get_image_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("verify_image_region.brig", &module)); + + // Get the list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Structures for querying format capabilities. + uint32_t capability_mask; + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (!(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get the destination image's format capability mask. + status = pfn.hsa_ext_image_get_capability(agent_list.agents[ii], + image_geometry, + image_format, + &capability_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED == capability_mask) { + printf("The destination image format is not supported.\n"); + continue; + } + + if (!(HSA_EXT_IMAGE_CAPABILITY_READ_ONLY & capability_mask) && + !(HSA_EXT_IMAGE_CAPABILITY_READ_WRITE & capability_mask) && + !(HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE & capability_mask)) { + printf("The destination image format cannot be tested.\n"); + continue; + } + + uint32_t grid_max_size; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_SIZE, &grid_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t grid_max_dim[3]; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t work_group_max_size; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &work_group_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t work_group_max_dim[3]; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_DIM, &work_group_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a global memory region for the image backing buffers + hsa_region_t global_region; + global_region.handle = (uint64_t) -1; + status = hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + ASSERT(global_region.handle != (uint64_t) -1); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t) -1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t) -1 != kernarg_region.handle); + + // Define the region step array + uint32_t region_step[3]; + + // Get information regarding the images on this agent using the specified + // geometry. + int image_dimension = 0; + uint32_t max_elements[3]; + char* validation_kernel[1]; + + get_geometry_info(agent_list.agents[ii], + image_format, + image_geometry, + &image_dimension, + max_elements, + &validation_kernel[0]); + + // Adjust max_elements values + max_elements[0] = (max_elements[0] < grid_max_dim[0]) ? max_elements[0] : grid_max_dim[0]; + max_elements[1] = (max_elements[1] < grid_max_dim[1]) ? max_elements[1] : grid_max_dim[1]; + max_elements[2] = (max_elements[2] < grid_max_dim[2]) ? max_elements[2] : grid_max_dim[2]; + max_elements[0] = (max_elements[0] < 1024) ? max_elements[0] : 1024; + max_elements[1] = (max_elements[1] < 1024) ? max_elements[1] : 1024; + max_elements[2] = (max_elements[2] < 8) ? max_elements[2] : 8; + ASSERT((max_elements[0] * max_elements[1] * max_elements[2]) < grid_max_size); + + // Adjust region step size values + region_step[0] = region_step[1] = 256; + region_step[2] = 1; + region_step[0] = (region_step[0] < max_elements[0]) ? region_step[0] : max_elements[0]; + region_step[1] = (region_step[1] < max_elements[1]) ? region_step[1] : max_elements[1]; + region_step[2] = (region_step[2] < max_elements[2]) ? region_step[2] : max_elements[2]; + + // Adjust the work_group_max_dim sizes + work_group_max_dim[0] = (work_group_max_dim[0] < 16) ? work_group_max_dim[0] : 16; + work_group_max_dim[1] = (work_group_max_dim[1] < 16) ? work_group_max_dim[1] : 16; + work_group_max_dim[2] = (work_group_max_dim[2] < 1) ? work_group_max_dim[2] : 1; + work_group_max_dim[0] = (work_group_max_dim[0] < max_elements[0]) ? work_group_max_dim[0] : max_elements[0]; + work_group_max_dim[1] = (work_group_max_dim[1] < max_elements[1]) ? work_group_max_dim[1] : max_elements[1]; + work_group_max_dim[2] = (work_group_max_dim[2] < max_elements[2]) ? work_group_max_dim[2] : max_elements[2]; + ASSERT((work_group_max_dim[0] * work_group_max_dim[1] * work_group_max_dim[2]) <= work_group_max_size); + + // Create a queue to execute validation kernels. + hsa_queue_t *queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info for the validation kernel + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, &validation_kernel[0], &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Define the validation kernel arguments + typedef struct __attribute__ ((aligned(16))) validate_args_s { + hsa_ext_image_t image; // The image handle + void* rgn_values; // The floating point pixel pattern in the specified region + void* bkg_values; // The floating point pixel pattern in the rest of the image + uint32_t* start_region; // The regions starting coords + uint32_t* end_region; // The regions ending coords + uint32_t* bits; // The channel values to compare + uint32_t* cmp_mask; // The channel values to compare + uint32_t* error; // An error field representing different rbga channel errors + } validate_args_t; + + // Allocate the kernel argument buffer from the correct region + validate_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the pattern buffers for the image + void* bg_pattern; + void* clr_pattern; + + if (image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32) { + status = hsa_memory_allocate(global_region, 4 * sizeof(uint32_t), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + uint32_t *bg = (uint32_t*) bg_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0; + status = hsa_memory_allocate(global_region, 4 * sizeof(uint32_t), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + uint32_t *clr = (uint32_t*) clr_pattern; + clr[0] = clr[1] = clr[2] = clr[3] = 255; + } else if (image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32) { + status = hsa_memory_allocate(global_region, 4 * sizeof(int32_t), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + int32_t *bg = (int32_t*) bg_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0; + status = hsa_memory_allocate(global_region, 4 * sizeof(int32_t), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + int32_t *clr = (int32_t*) clr_pattern; + clr[0] = clr[1] = clr[2] = clr[3] = 127; + } else { + status = hsa_memory_allocate(global_region, 4 * sizeof(float), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + float *bg = (float*) bg_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0.0f; + status = hsa_memory_allocate(global_region, 4 * sizeof(float), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + float *clr = (float*) clr_pattern; + clr[0] = clr[1] = clr[2] = clr[3] = 0.5f; + } + + // Create the start and end region buffers + uint32_t* start_region; + uint32_t* end_region; + status = hsa_memory_allocate(global_region, 3 * sizeof(uint32_t), (void**) &start_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 3 * sizeof(uint32_t), (void**) &end_region); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the error, mask and bits buffers + uint32_t* error; + uint32_t* cmp_mask; + uint32_t* bits; + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &error); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &cmp_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &bits); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add all of the components to the argument buffer + *cmp_mask = get_cmp_info(image_format->channel_order); + *bits = get_channel_type_bits(image_format->channel_type); + kernarg_buffer->rgn_values = clr_pattern; + kernarg_buffer->bkg_values = bg_pattern; + kernarg_buffer->start_region = start_region; + kernarg_buffer->end_region = end_region; + kernarg_buffer->bits = bits; + kernarg_buffer->cmp_mask = cmp_mask; + kernarg_buffer->error = error; + + // Determine the size and alignment for the source image backing buffer + hsa_ext_image_descriptor_t image_descriptor; + image_descriptor.geometry = image_geometry; + image_descriptor.width = max_elements[0]; + image_descriptor.height = max_elements[1]; + image_descriptor.depth = max_elements[2]; + image_descriptor.array_size = 1; + image_descriptor.format.channel_type = image_format->channel_type; + image_descriptor.format.channel_order = image_format->channel_order; + + hsa_ext_image_data_info_t image_info; + hsa_access_permission_t access_permissions = HSA_ACCESS_PERMISSION_RW; + + status = hsa_ext_image_data_get_info(agent_list.agents[ii], + &image_descriptor, + access_permissions, + &image_info); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify that the memory region will correctly align the + // image data. + size_t region_align; + status = hsa_region_get_info(global_region, HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT, ®ion_align); + + ASSERT((region_align >= image_info.alignment) && (region_align % image_info.alignment == 0)); + + // Allocate the backing buffer for the source image + hsa_ext_image_t src_image; + void* src_image_data; + status = hsa_memory_allocate(global_region, image_info.size, (void**) &src_image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create an image with the backing buffer. + status = pfn.hsa_ext_image_create(agent_list.agents[ii], + &image_descriptor, + src_image_data, + access_permissions, + &src_image); + + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the backing buffer for the source image + hsa_ext_image_t dst_image; + void* dst_image_data; + status = hsa_memory_allocate(global_region, image_info.size, (void**) &dst_image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create an image with the backing buffer. + status = pfn.hsa_ext_image_create(agent_list.agents[ii], + &image_descriptor, + dst_image_data, + access_permissions, + &dst_image); + + // Create a completion signal + hsa_signal_t completion_signal; + status = hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.setup |= image_dimension << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + dispatch_packet.workgroup_size_x = work_group_max_dim[0]; + dispatch_packet.workgroup_size_y = work_group_max_dim[1]; + dispatch_packet.workgroup_size_z = work_group_max_dim[2]; + dispatch_packet.grid_size_x = max_elements[0]; + dispatch_packet.grid_size_y = max_elements[1]; + dispatch_packet.grid_size_z = max_elements[2]; + + // Define the regions + hsa_ext_image_region_t region_all; + hsa_ext_image_region_t region_partial; + region_all.offset.x = 0; + region_all.offset.y = 0; + region_all.offset.z = 0; + region_all.range.x = max_elements[0]; + region_all.range.y = max_elements[1]; + region_all.range.z = max_elements[2]; + + size_t x_offset, y_offset, z_offset; + + // Clear various regions of the images, checking for validity each iteration + for (x_offset = 0; x_offset < max_elements[0]; x_offset += region_step[0]) { + region_partial.offset.x = x_offset; + region_partial.range.x = (region_step[0] <= (max_elements[0] - x_offset)) ? region_step[0] : max_elements[0] % region_step[0]; + if (region_partial.range.x <= 0) { + continue; + } + start_region[0] = region_partial.offset.x; + end_region[0] = start_region[0] + region_partial.range.x; + + for (y_offset = 0; y_offset < max_elements[1]; y_offset += region_step[1]) { + region_partial.offset.y = y_offset; + region_partial.range.y = (region_step[1] <= (max_elements[1] - y_offset)) ? region_step[1] : max_elements[1] % region_step[1]; + if (region_partial.range.y <= 0) { + continue; + } + start_region[1] = region_partial.offset.y; + end_region[1] = start_region[1] + region_partial.range.y; + + for (z_offset = 0; z_offset < max_elements[2]; z_offset += region_step[2]) { + region_partial.offset.z = z_offset; + region_partial.range.z = (region_step[2] <= (max_elements[2] - y_offset)) ? region_step[2] : max_elements[1] % region_step[2]; + if (region_partial.range.z <= 0) { + continue; + } + start_region[2] = region_partial.offset.z; + end_region[2] = start_region[2] + region_partial.range.z; + + // Clear the entire source image to the clr_pattern. + status = pfn.hsa_ext_image_clear(agent_list.agents[ii], + src_image, + clr_pattern, + ®ion_all); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Clear the entire destination image to the bg_pattern. + status = pfn.hsa_ext_image_clear(agent_list.agents[ii], + dst_image, + bg_pattern, + ®ion_all); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Copy the partial region of the source image to the destination image + status = pfn.hsa_ext_image_copy(agent_list.agents[ii], + src_image, + ®ion_partial.offset, + dst_image, + ®ion_partial.offset, + ®ion_partial.range); + + ASSERT(HSA_STATUS_SUCCESS == status); + + kernarg_buffer->image = dst_image; + + *kernarg_buffer->error = 0; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Wait on the completion signal + hsa_signal_value_t value; + do { + value = hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + } while (0 != value); + + // Verify that no errors occured. + if (0 != *kernarg_buffer->error) { + printf("\nRegion: (%d,%d,%d) -> (%d,%d,%d) Error: %d\n", + start_region[0], + start_region[1], + start_region[2], + end_region[0], + end_region[1], + end_region[2], + *kernarg_buffer->error); + ASSERT(0 == *kernarg_buffer->error); + } + + printf("."); + + // Reset the signal value + hsa_signal_store_release(completion_signal, 1); + } + } + } + + // Destroy the completion signal + hsa_signal_destroy(completion_signal); + + // Destroy the images + status = pfn.hsa_ext_image_destroy(agent_list.agents[ii], src_image); + ASSERT(HSA_STATUS_SUCCESS == status); + status = pfn.hsa_ext_image_destroy(agent_list.agents[ii], dst_image); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the backing buffers + status = hsa_memory_free(src_image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(dst_image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the kernarg_buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the clear and background patterns + status = hsa_memory_free(bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free all the utility buffers + status = hsa_memory_free(start_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(end_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(error); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(cmp_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(bits); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Shutdown HSA + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/extensions/images/import_export/hsa_image_import_export.c b/src/extensions/images/import_export/hsa_image_import_export.c new file mode 100644 index 0000000..18a873e --- /dev/null +++ b/src/extensions/images/import_export/hsa_image_import_export.c @@ -0,0 +1,5174 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "hsa_image_import_export.h" + +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DADEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,3D, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DA, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1DB, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DDEPTH, _); +DEFINE_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DADEPTH, _); + +int main(int argc, char* argv[]) { + INITIALIZE_TESTSUITE(); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DADEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,3D, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DA, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1DB, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DDEPTH, _); + ADD_IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DADEPTH, _); + + RUN_TESTS(); +} diff --git a/src/extensions/images/import_export/hsa_image_import_export.h b/src/extensions/images/import_export/hsa_image_import_export.h new file mode 100644 index 0000000..8c7751c --- /dev/null +++ b/src/extensions/images/import_export/hsa_image_import_export.h @@ -0,0 +1,2636 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef HSA_IMAGES_IMPORT_EXPORT_H_ +#define HSA_IMAGES_IMPORT_EXPORT_H_ + +#include +#include +#include +#include + +#define DEFINE_IMAGE_IMPORT_EXPORT_TEST(__type__, __order__, __geometry__, __sep__) \ +DEFINE_TEST(image_import_export_##__type__##__sep__##__order__##__sep__##__geometry__) + +#define ADD_IMAGE_IMPORT_EXPORT_TEST(__type__, __order__, __geometry__, __sep__) \ +ADD_TEST(image_import_export_##__type__##__sep__##__order__##__sep__##__geometry__) + +#define IMAGE_IMPORT_EXPORT_TEST(__type__, __order__, __geometry__, __sep__) \ +int test_image_import_export_##__type__##__sep__##__order__##__sep__##__geometry__() { \ + hsa_ext_image_format_t image_format; \ + hsa_ext_image_geometry_t image_geometry; \ + image_format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_##__type__; \ + image_format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_##__order__; \ + image_geometry = HSA_EXT_IMAGE_GEOMETRY_##__geometry__; \ + printf("\nTesting import-export of , geometry = %s\n", #__type__, #__order__, #__geometry__); \ + test_image_import_export(&image_format, image_geometry); \ + return 0; \ +} + +extern int test_image_import_export(hsa_ext_image_format_t* image_format, + hsa_ext_image_geometry_t image_geometry); + +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT8,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SNORM_INT16,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT8,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT16,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_INT24,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_555,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_565,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNORM_SHORT_101010,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT8,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT16,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(SIGNED_INT32,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT8,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT16,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(UNSIGNED_INT32,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(HALF_FLOAT,DEPTH_STENCIL,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,A,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,R,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RG,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,RGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,BGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ARGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,ABGR,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGB,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBX,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SRGBA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,SBGRA,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,INTENSITY,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,LUMINANCE,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH,2DADEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,3D, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DA, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,1DB, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DDEPTH, _); +IMAGE_IMPORT_EXPORT_TEST(FLOAT,DEPTH_STENCIL,2DADEPTH, _); + +#endif diff --git a/src/extensions/images/import_export/test_image_import_export.c b/src/extensions/images/import_export/test_image_import_export.c new file mode 100644 index 0000000..eac8c59 --- /dev/null +++ b/src/extensions/images/import_export/test_image_import_export.c @@ -0,0 +1,568 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/* + * Test Name: image_import_export___ + * + * Purpose: Verifies that if an image with the format and geometry specified + * by the channel type, channel order and image geometry is supported on an agent + * it can be created and successfully imported and exported using the + * hsa_ext_image_import and hsa_ext_image_export APIs. + * + * Test Description: + * 1) Check each agent on the platform and determine if they support an image with + * channel type = , channel order = and geometry = . Use the + * hsa_ext_image_get_format_capability to do this. + * 2) If the agent supports the format and geometry, query the agent using hsa_agent_get_info + * to determine the maximum size of an image on the agent. + * 3) Use hsa_ext_image_get_info to determine the size and alignment required for the image + * backing buffer. The image permissions should be read/write and the dimensions should + * be the maximum queried from the previous step. + * 4) Allocate the backing buffer with hsa_memory_allocate from an appropriate memory region + * associated with the agent. + * 5) Create an image handle on the agent using the backing buffer allocated in the previous + * step. + * 6) Use the hsa_ext_image_clear API to clear the entire image with a set data pattern. + * 7) Use the hsa_ext_image_export API to export the entire image. Verify that it + * was properly cleared. + * 8) Use the hsa_ext_image_import API to import a portion of the image. + * 9) Use the hsa_ext_image_export API to export that portion of the image. Verify + * that the portion of the data just exported matches the original export data. + * 10) Repeat steps 8 to 9 until the entire image has been imported/exported. + * + * Expected results: The import/export API calls should succeed and the data should remain + * unchanged. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int test_image_import_export(hsa_ext_image_format_t* image_format, + hsa_ext_image_geometry_t image_geometry) { + hsa_status_t status; + status = hsa_init(); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the images function pointer table + hsa_ext_image_pfn_t pfn; + status = get_image_fnc_tbl(&pfn); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Load the BRIG module + hsa_ext_module_t module; + ASSERT(0 == load_module_from_file("verify_image_region.brig", &module)); + + // Get the list of agents + struct agent_list_s agent_list; + get_agent_list(&agent_list); + + // Structures for querying format capabilities. + uint32_t capability_mask; + + // Repeat the test for each agent + int ii; + for (ii = 0; ii < agent_list.num_agents; ++ii) { + uint32_t features = 0; + status = hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_FEATURE, &features); + ASSERT(HSA_STATUS_SUCCESS == status); + if (!(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) { + continue; + } + + // Get format capability mask. + status = pfn.hsa_ext_image_get_capability(agent_list.agents[ii], + image_geometry, + image_format, + &capability_mask); + + ASSERT(HSA_STATUS_SUCCESS == status); + + if (HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED == capability_mask) { + printf("Image format is not supported.\n"); + continue; + } + + if (!(HSA_EXT_IMAGE_CAPABILITY_READ_ONLY & capability_mask) && + !(HSA_EXT_IMAGE_CAPABILITY_READ_WRITE & capability_mask) && + !(HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE & capability_mask)) { + printf("Image format cannot be tested.\n"); + continue; + } + + uint32_t grid_max_size; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_SIZE, &grid_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t grid_max_dim[3]; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t work_group_max_size; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &work_group_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t work_group_max_dim[3]; + hsa_agent_get_info(agent_list.agents[ii], HSA_AGENT_INFO_WORKGROUP_MAX_DIM, &work_group_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Find a global memory region for the image backing buffer + hsa_region_t global_region; + global_region.handle = (uint64_t) -1; + status = hsa_agent_iterate_regions(agent_list.agents[ii], get_global_memory_region_fine_grained, &global_region); + ASSERT(global_region.handle != (uint64_t) -1); + + // Find a memory region that supports kernel arguments + hsa_region_t kernarg_region; + kernarg_region.handle = (uint64_t) -1; + hsa_agent_iterate_regions(agent_list.agents[ii], get_kernarg_memory_region, &kernarg_region); + ASSERT((uint64_t) -1 != kernarg_region.handle); + + // Get information regarding the image on this agent using the specified + // geometry. + int image_dimension = 0; + uint32_t max_elements[3]; + uint32_t region_step[3]; + char* validation_kernel[1]; + + get_geometry_info(agent_list.agents[ii], + image_format, + image_geometry, + &image_dimension, + max_elements, + &validation_kernel[0]); + + // Adjust max_elements values + max_elements[0] = (max_elements[0] < grid_max_dim[0]) ? max_elements[0] : grid_max_dim[0]; + max_elements[1] = (max_elements[1] < grid_max_dim[1]) ? max_elements[1] : grid_max_dim[1]; + max_elements[2] = (max_elements[2] < grid_max_dim[2]) ? max_elements[2] : grid_max_dim[2]; + max_elements[0] = (max_elements[0] < 1024) ? max_elements[0] : 1024; + max_elements[1] = (max_elements[1] < 1024) ? max_elements[1] : 1024; + max_elements[2] = (max_elements[2] < 8) ? max_elements[2] : 8; + ASSERT((max_elements[0] * max_elements[1] * max_elements[2]) < grid_max_size); + + // Adjust region step size values + region_step[0] = region_step[1] = 256; + region_step[2] = 1; + region_step[0] = (region_step[0] < max_elements[0]) ? region_step[0] : max_elements[0]; + region_step[1] = (region_step[1] < max_elements[1]) ? region_step[1] : max_elements[1]; + region_step[2] = (region_step[2] < max_elements[2]) ? region_step[2] : max_elements[2]; + + // Adjust the work_group_max_dim sizes + work_group_max_dim[0] = (work_group_max_dim[0] < 16) ? work_group_max_dim[0] : 16; + work_group_max_dim[1] = (work_group_max_dim[1] < 16) ? work_group_max_dim[1] : 16; + work_group_max_dim[2] = (work_group_max_dim[2] < 1) ? work_group_max_dim[2] : 1; + work_group_max_dim[0] = (work_group_max_dim[0] < max_elements[0]) ? work_group_max_dim[0] : max_elements[0]; + work_group_max_dim[1] = (work_group_max_dim[1] < max_elements[1]) ? work_group_max_dim[1] : max_elements[1]; + work_group_max_dim[2] = (work_group_max_dim[2] < max_elements[2]) ? work_group_max_dim[2] : max_elements[2]; + ASSERT((work_group_max_dim[0] * work_group_max_dim[1] * work_group_max_dim[2]) <= work_group_max_size); + + // Create a queue to execute validation kernels. + hsa_queue_t *queue; + status = hsa_queue_create(agent_list.agents[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Finalize the executable + hsa_ext_control_directives_t control_directives; + memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t)); + hsa_code_object_t code_object; + hsa_executable_t executable; + + status = finalize_executable(agent_list.agents[ii], + 1, + &module, + HSA_MACHINE_MODEL_LARGE, + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, + HSA_CODE_OBJECT_TYPE_PROGRAM, + 0, + control_directives, + &code_object, + &executable); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Get the symbol and the symbol info + symbol_record_t symbol_record; + memset(&symbol_record, 0, sizeof(symbol_record_t)); + + status = get_executable_symbols(executable, agent_list.agents[ii], 0, 1, &validation_kernel[0], &symbol_record); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Define the validation kernel arguments + typedef struct __attribute__ ((aligned(16))) validate_args_s { + hsa_ext_image_t image; // The image handle + void* rgn_values; // The floating point pixel pattern in the specified region + void* bkg_values; // The floating point pixel pattern in the rest of the image + uint32_t* start_region; // The regions starting coords + uint32_t* end_region; // The regions ending coords + uint32_t* bits; // The channel values to compare + uint32_t* cmp_mask; // The channel values to compare + uint32_t* error; // An error field representing different rbga channel errors + } validate_args_t; + + + // Allocate the kernel argument buffer from the correct region + validate_args_t* kernarg_buffer = NULL; + status = hsa_memory_allocate(kernarg_region, + symbol_record.kernarg_segment_size, + (void**)(&kernarg_buffer)); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the pattern buffers + void* bg_pattern; + void* clr_pattern; + + if (image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32) { + status = hsa_memory_allocate(global_region, 4 * sizeof(uint32_t), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 4 * sizeof(uint32_t), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + uint32_t *bg = (uint32_t*) bg_pattern; + uint32_t *clr = (uint32_t*) clr_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0; + clr[0] = clr[1] = clr[2] = clr[3] = 255; + } else if (image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 || + image_format->channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32) { + status = hsa_memory_allocate(global_region, 4 * sizeof(int32_t), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 4 * sizeof(int32_t), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + int32_t *bg = (int32_t*) bg_pattern; + int32_t *clr = (int32_t*) clr_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0; + clr[0] = clr[1] = clr[2] = clr[3] = 127; + } else { + status = hsa_memory_allocate(global_region, 4 * sizeof(float), (void**) &bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 4 * sizeof(float), (void**) &clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + float *bg = (float*) bg_pattern; + float *clr = (float*) clr_pattern; + bg[0] = bg[1] = bg[2] = bg[3] = 0.0f; + clr[0] = clr[1] = clr[2] = clr[3] = 0.5f; + } + + // Create the start and end region buffers + uint32_t* start_region; + uint32_t* end_region; + status = hsa_memory_allocate(global_region, 3 * sizeof(uint32_t), (void**) &start_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, 3 * sizeof(uint32_t), (void**) &end_region); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create the error, mask and bits buffers + uint32_t* error; + uint32_t* cmp_mask; + uint32_t* bits; + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &error); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &cmp_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_allocate(global_region, sizeof(uint32_t), (void**) &bits); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Add all of the components to the argument buffer + *cmp_mask = get_cmp_info(image_format->channel_order); + *bits = get_channel_type_bits(image_format->channel_type); + kernarg_buffer->rgn_values = clr_pattern; + kernarg_buffer->bkg_values = bg_pattern; + kernarg_buffer->start_region = start_region; + kernarg_buffer->end_region = end_region; + kernarg_buffer->bits = bits; + kernarg_buffer->cmp_mask = cmp_mask; + kernarg_buffer->error = error; + + // Determine the size and alignment for the image backing buffer + hsa_ext_image_descriptor_t image_descriptor; + image_descriptor.geometry = image_geometry; + image_descriptor.width = max_elements[0]; + image_descriptor.height = max_elements[1]; + image_descriptor.depth = max_elements[2]; + image_descriptor.array_size = 1; + image_descriptor.format.channel_type = image_format->channel_type; + image_descriptor.format.channel_order = image_format->channel_order; + + hsa_ext_image_data_info_t image_info; + hsa_access_permission_t access_permissions = HSA_ACCESS_PERMISSION_RW; + + status = pfn.hsa_ext_image_data_get_info(agent_list.agents[ii], + &image_descriptor, + access_permissions, + &image_info); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Verify that the memory region will correctly align the + // image data. + size_t region_align; + status = hsa_region_get_info(global_region, HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT, ®ion_align); + + ASSERT((region_align >= image_info.alignment) && (region_align % image_info.alignment == 0)); + + // Allocate the backing buffer + void* image_data; + status = hsa_memory_allocate(global_region, image_info.size, (void**) &image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Allocate the export buffer + void* export_buffer; + status = hsa_memory_allocate(global_region, image_info.size, (void**) &export_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create an image with the backing buffer. + status = pfn.hsa_ext_image_create(agent_list.agents[ii], + &image_descriptor, + image_data, + access_permissions, + &(kernarg_buffer->image)); + + // Create a completion signal + hsa_signal_t completion_signal; + status = hsa_signal_create(1, 0, NULL, &completion_signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Setup the dispatch packet + hsa_kernel_dispatch_packet_t dispatch_packet; + memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet.setup |= image_dimension << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER; + dispatch_packet.kernel_object = symbol_record.kernel_object; + dispatch_packet.group_segment_size = symbol_record.group_segment_size; + dispatch_packet.private_segment_size = symbol_record.private_segment_size; + dispatch_packet.kernarg_address = (void*) kernarg_buffer; + dispatch_packet.completion_signal = completion_signal; + dispatch_packet.workgroup_size_x = work_group_max_dim[0]; + dispatch_packet.workgroup_size_y = work_group_max_dim[1]; + dispatch_packet.workgroup_size_z = work_group_max_dim[2]; + dispatch_packet.grid_size_x = max_elements[0]; + dispatch_packet.grid_size_y = max_elements[1]; + dispatch_packet.grid_size_z = max_elements[2]; + + // Define the regions. + hsa_ext_image_region_t region_all; + hsa_ext_image_region_t region_partial; + region_all.offset.x = 0; + region_all.offset.y = 0; + region_all.offset.z = 0; + region_all.range.x = max_elements[0]; + region_all.range.y = max_elements[1]; + region_all.range.z = max_elements[2]; + + size_t x_offset, y_offset, z_offset; + + // Clear various regions of the image, checking for validity each iteration + for (x_offset = 0; x_offset < max_elements[0]; x_offset += region_step[0]) { + region_partial.offset.x = x_offset; + region_partial.range.x = (region_step[0] <= (max_elements[0] - x_offset)) ? region_step[0] : max_elements[0] % region_step[0]; + if (region_partial.range.x <= 0) { + continue; + } + start_region[0] = region_partial.offset.x; + end_region[0] = start_region[0] + region_partial.range.x; + + for (y_offset = 0; y_offset < max_elements[1]; y_offset += region_step[1]) { + region_partial.offset.y = y_offset; + region_partial.range.y = (region_step[1] <= (max_elements[1] - y_offset)) ? region_step[1] : max_elements[1] % region_step[1]; + if (region_partial.range.y <= 0) { + continue; + } + start_region[1] = region_partial.offset.y; + end_region[1] = start_region[1] + region_partial.range.y; + + for (z_offset = 0; z_offset < max_elements[2]; z_offset += region_step[2]) { + region_partial.offset.z = z_offset; + region_partial.range.z = (region_step[2] <= (max_elements[2] - y_offset)) ? region_step[2] : max_elements[1] % region_step[2]; + if (region_partial.range.z <= 0) { + continue; + } + start_region[2] = region_partial.offset.z; + end_region[2] = start_region[2] + region_partial.range.z; + + // Clear the entire image to the bg_pattern. + status = pfn.hsa_ext_image_clear(agent_list.agents[ii], + kernarg_buffer->image, + bg_pattern, + ®ion_all); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Clear the partial region of the image with the clr_pattern. + status = pfn.hsa_ext_image_clear(agent_list.agents[ii], + kernarg_buffer->image, + clr_pattern, + ®ion_partial); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Export the entire image to a buffer + size_t dst_row_pitch = region_all.range.x; + size_t dst_slice_pitch = region_all.range.x * region_all.range.y; + + status = pfn.hsa_ext_image_export(agent_list.agents[ii], + kernarg_buffer->image, + export_buffer, + dst_row_pitch, + dst_slice_pitch, + ®ion_all); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Clear the image + status = pfn.hsa_ext_image_clear(agent_list.agents[ii], + kernarg_buffer->image, + bg_pattern, + ®ion_all); + + ASSERT(HSA_STATUS_SUCCESS == status); + + // Import the entire image from the buffer + status = pfn.hsa_ext_image_import(agent_list.agents[ii], + export_buffer, + dst_row_pitch, + dst_slice_pitch, + kernarg_buffer->image, + ®ion_all); + + ASSERT(HSA_STATUS_SUCCESS == status); + + *kernarg_buffer->error = 0; + + // Dispatch the kernel + enqueue_dispatch_packet(queue, &dispatch_packet); + + // Wait on the completion signal + hsa_signal_value_t value; + do { + value = hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + } while (0 != value); + + // Verify that no errors occured. + if (0 != *kernarg_buffer->error) { + printf("\nRegion: (%d,%d,%d) -> (%d,%d,%d) Error: %d\n", + start_region[0], + start_region[1], + start_region[2], + end_region[0], + end_region[1], + end_region[2], + *kernarg_buffer->error); + ASSERT(0 == *kernarg_buffer->error); + } + + printf("."); + + // Reset the signal value + hsa_signal_store_release(completion_signal, 1); + } + } + } + + // Destroy the completion signal + hsa_signal_destroy(completion_signal); + + // Destroy the image + status = pfn.hsa_ext_image_destroy(agent_list.agents[ii], kernarg_buffer->image); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the backing buffer + status = hsa_memory_free(image_data); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the kernarg_buffer + status = hsa_memory_free(kernarg_buffer); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the executable + status = hsa_executable_destroy(executable); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the code object + status = hsa_code_object_destroy(code_object); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free the clear and background patterns + status = hsa_memory_free(bg_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(clr_pattern); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Free all the utility buffers + status = hsa_memory_free(start_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(end_region); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(error); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(cmp_mask); + ASSERT(HSA_STATUS_SUCCESS == status); + status = hsa_memory_free(bits); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Destroy the queue + status = hsa_queue_destroy(queue); + ASSERT(HSA_STATUS_SUCCESS == status); + } + + // Shutdown HSA + status = hsa_shut_down(); + ASSERT(HSA_STATUS_SUCCESS == status); + + return 0; +} diff --git a/src/kernels/agent_dispatch.brig b/src/kernels/agent_dispatch.brig new file mode 100644 index 0000000..66bd6f5 Binary files /dev/null and b/src/kernels/agent_dispatch.brig differ diff --git a/src/kernels/agent_dispatch.hsail b/src/kernels/agent_dispatch.hsail new file mode 100644 index 0000000..9115394 --- /dev/null +++ b/src/kernels/agent_dispatch.hsail @@ -0,0 +1,136 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &agent_dispatch:1:0:$full:$large:$default; + +decl prog function &abort()(); + +prog kernel &__agent_dispatch_kernel( + kernarg_u64 %return_addresses, + kernarg_u64 %dispatch_args, + kernarg_u64 %signal_handle, + kernarg_u64 %count, + kernarg_u64 %service_queue) { +@__agent_dispatch_kernel_entry: + // BB#0: // %entry + ld_kernarg_align(8)_width(all)_u64 $d0, [%service_queue]; // d0 has the queue address + + // this is used to help to compute the greatest write_index for the doorbell signal. + // write_index (for doorbell) = base write_index (before sync) + %count. + ldqueuewriteindex_global_rlx_u64 $d10, [$d0]; // d10 has the queue's write index + + // sync workitems + barrier; + + addqueuewriteindex_global_rlx_u64 $d1, [$d0], 1; // d1 has the queue's write_index + add_u64 $d3, $d0, 8; // d3 has the address of AQL packets base address + add_u64 $d4, $d0, 24; // d4 has the address of the size of the queue + ld_global_s32 $s1, [$d4]; // s1 has the size of the queue + + + // mod the write_index + mov_b32 $s2, 0; + combine_v2_b64_b32 $d5, ($s1, $s2); // convert the size from 32-bit to 64-bit + rem_u64 $d6, $d1, $d5; // d6 has the index where a new packet is to be written at + // a dispatch packet is 64 (2^6) bytes + shl_u64 $d5, $d6, 6; // d5 now has the byte-offset at which a new packet is to be written + ld_global_u64 $d6, [$d3]; // d6 now has the base address of AQL packets + add_u64 $d7, $d6, $d5; // d7 has the address where a new packet is to be written at + + // Packet: 32-63 bits + // reserved 2, must be j0 + mov_b32 $s2, 0; + st_global_u32 $s2, [$d7+4]; + + // use the work ID to select the proper return address and args + workitemflatabsid_u64 $d13; + shl_u64 $d13, $d13, 3; // d13 is the offset to index the proper arguments + + // Packet: return location + ld_kernarg_align(8)_width(all)_u64 $d8, [%return_addresses]; + add_u64 $d8, $d8, $d13; + st_global_u64 $d8, [$d7+8]; + + // Packet: args + ld_kernarg_align(8)_width(all)_u64 $d8, [%dispatch_args]; + add_u64 $d8, $d8, $d13; + ld_global_u64 $d9, [$d8]; + st_global_u64 $d9, [$d7+16]; // args[0] = ld [%dispatch_args + offset ($d13)] + mov_b64 $d8, 0; + st_global_u64 $d8, [$d7+24]; // args[1] = 0 + st_global_u64 $d8, [$d7+32]; // args[2] = 1 + st_global_u64 $d8, [$d7+40]; // args[3] = 0 + + // Packet: reserved 3 + mov_b64 $d8, 0; + st_global_u64 $d8, [$d7+48]; + + // Packet: completion signal + ld_kernarg_align(8)_width(all)_u64 $d8, [%signal_handle]; + st_global_u64 $d8, [$d7+56]; + + // Packet: 0-31 bits + // agent_dispatch type (upper 16 bits) + packet header (lower 16 bits) + // 000 10 10 1 00000100 --> 0x1504 + // --- -- -- - -------- + // Reserved Fence Scope Barrier packet type (agent dispatch) + mov_b32 $s2, 0x80001504; + st_global_u32 $s2, [$d7]; + + // only the 1st workitem rings the doorbell with the greatest write_index + workitemflatabsid_u32 $s1; + cmp_gt_b1_u32 $c0, $s1, 0; + cbr_b1 $c0, @BB0_2; + + // signal the doorbell (the 1st workitem only) + ld_global_sig64 $d9, [$d0+16]; // d9 register is the signal handle + ld_kernarg_align(8)_width(all)_u64 $d11, [%count]; // total number of work items + add_u64 $d12, $d10, $d11; // the signal value + signalnoret_st_rlx_b64_sig64 $d9, $d12; + +@BB0_2: + // %return + ret; +}; diff --git a/src/kernels/control_device.brig b/src/kernels/control_device.brig new file mode 100644 index 0000000..a7f4089 Binary files /dev/null and b/src/kernels/control_device.brig differ diff --git a/src/kernels/control_device.hsail b/src/kernels/control_device.hsail new file mode 100644 index 0000000..74d0de7 --- /dev/null +++ b/src/kernels/control_device.hsail @@ -0,0 +1,122 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &control_device:1:0:$full:$large:$default; + +/* + * Kernels: control_device + * + * Description: The control_device kernel has definations of several + * control directives for low-level performance tuning. + * + * Psuedo code: + * + * __kernel void no_op() { + * return; + * } + * + */ + +decl prog function &abort()(); + +decl prog function &control_device_fn()(); + +prog kernel &__control_device_kernel() { +@__control_device_kernel_entry: + // BB#0: + // Start the argument scope + { + call &control_device_fn()(); + } + ret; +}; + +prog function &control_device_fn()() +{ + maxdynamicgroupsize 4096; // set to 4K + //----------------------------------------------------------------------------------------------------------------------- + // Must be an immediate value greater than 0, or WAVESIZE. + // If multiple control directives are present in the kernel or the functions it calls, they must all have the same values. + // The value specified must be greater than or equal to the product of the values specified by requiredgridsize + //----------------------------------------------------------------------------------------------------------------------- + maxflatgridsize 1000; + maxflatgridsize 1000; + //----------------------------------------------------------------------------------------------------------------------- + // Must be an immediate value greater than 0, or WAVESIZE. + // If multiple control directives are present in the kernel or the functions it calls, they must all have the same values. + // The value specified must also be greater than or equal to the product of the values specified by requiredworkgroupsize. + //----------------------------------------------------------------------------------------------------------------------- + maxflatworkgroupsize 100; + maxflatworkgroupsize 100; + //----------------------------------------------------------------------------------------------------------------------- + // Must be an immediate value with the value 1, 2, or 3. + // If multiple control directives are present in the kernel or the functions it calls, they must all have the same values. + // must be consistent with requiredgridsize and requiredworkgroupsize: + // if requireddim = 1, => requiredgridsize X, 1, 1; requiredworkgroupsize X, 1, 1; + // if requireddim = 2, => requiredgridsize X, Y, 1; requiredworkgroupsize X, Y, 1; + //----------------------------------------------------------------------------------------------------------------------- + requireddim 3; + requireddim 3; + //----------------------------------------------------------------------------------------------------------------------- + // Must be an immediate value greater than 0, or WAVESIZE. + // If multiple control directives are present in the kernel or the functions it calls, they must all have the same values. + // The product of the values must be less than or equal to value specified by maxflatgridsize + // Must be consistent with requireddim + //----------------------------------------------------------------------------------------------------------------------- + requiredgridsize 1000, 1000, 1000; + requiredgridsize 1000, 1000, 1000; + //----------------------------------------------------------------------------------------------------------------------- + // Must be an immediate value greater than 0, or WAVESIZE. + // If multiple control directives are present in the kernel or the functions it calls, they must all have the same values. + // The product of the values must be less than or equal to any value specified by maxflatworkgroupsize . + // Must be consistent with requireddim + //----------------------------------------------------------------------------------------------------------------------- + requiredworkgroupsize 100, 100, 100; + requiredworkgroupsize 100, 100, 100; + //----------------------------------------------------------------------------------------------------------------------- + +ret; +}; + diff --git a/src/kernels/depend_module1.brig b/src/kernels/depend_module1.brig new file mode 100644 index 0000000..daec4c4 Binary files /dev/null and b/src/kernels/depend_module1.brig differ diff --git a/src/kernels/depend_module1.hsail b/src/kernels/depend_module1.hsail new file mode 100644 index 0000000..ee57ee9 --- /dev/null +++ b/src/kernels/depend_module1.hsail @@ -0,0 +1,75 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &depend_module1:1:0:$full:$large:$default; + +/* + * Kernels: depend_module1 + * + * Description: The module includes element_add_fn definition, will be called + * by the other module, to test the dependency. + * + * Psuedo code: + * declare prog function &element_add()(); + * __kernel void no_op() { + * return; + * } + * + */ + +prog function &element_add_fn(arg_u32 %out)(arg_u32 %in0, arg_u32 %in1) +{ + ld_arg_u32 $s0, [%in0]; + ld_arg_u32 $s1, [%in1]; + add_u32 $s2, $s0, $s1; + st_arg_u32 $s2, [%out]; + ret; +}; + +prog kernel &__no_op_kernel() { +@__no_op_kernel_entry: + // BB#0: // %entry + ret; +}; diff --git a/src/kernels/depend_module2.brig b/src/kernels/depend_module2.brig new file mode 100644 index 0000000..cd3c2fa Binary files /dev/null and b/src/kernels/depend_module2.brig differ diff --git a/src/kernels/depend_module2.hsail b/src/kernels/depend_module2.hsail new file mode 100644 index 0000000..6c25f3d --- /dev/null +++ b/src/kernels/depend_module2.hsail @@ -0,0 +1,103 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &depend_module2:1:0:$full:$large:$default; + +/* + * Kernels: depend_module2 + * + * Description: The module2 depends on module1 to process + * + * Psuedo code: + * + * __kernel void element_add_kernel( + * __global int* in0, + * __global int* in1, + * __global int* out) { + * call element_add(out)(in0,in1); + * } + * + */ + +decl prog function &abort()(); + +decl prog function &element_add_fn(arg_u32 %out)(arg_u32 %in0, arg_u32 %in1); + +prog kernel &__element_add_kernel( + kernarg_u64 %in0, + kernarg_u64 %in1, + kernarg_u64 %out + ) { +@__element_add_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s0, 0; //obtain the work_item absolute ID within the entire grid + cvt_u64_u32 $d0, $s0; //convert 32 bits to 64 bits, stored in $d0 + shl_u64 $d1, $d0, 2; //shift $d0 left by 2 bit, store in $d1, for 32 bit integers + ld_kernarg_align(8)_width(all)_u64 $d0, [%out];//load the base address of out to $d0 + add_u64 $d0, $d0, $d1; //calculate the address of the output element + ld_kernarg_align(8)_width(all)_u64 $d2, [%in0]; + add_u64 $d2, $d2, $d1; + ld_kernarg_align(8)_width(all)_u64 $d3, [%in1]; + add_u64 $d3, $d3, $d1; + + //start argument scope + { + arg_u32 %sum; + arg_u32 %arg0; + arg_u32 %arg1; + ld_global_align(4)_u32 $s0, [$d0]; + ld_global_align(4)_u32 $s1, [$d2]; + ld_global_align(4)_u32 $s2, [$d3]; + //fill in the arguments + st_arg_u32 $s0, [%sum]; + st_arg_u32 $s1, [%arg0]; + st_arg_u32 $s2, [%arg1]; + call &element_add_fn(%sum)(%arg0,%arg1); + ld_arg_align(4)_u32 $s0, [%sum]; + } + //end of argument scope + st_global_align(4)_u32 $s0, [$d0]; //store the result to the out vector + ret; +}; diff --git a/src/kernels/global_agent_vector_copy.brig b/src/kernels/global_agent_vector_copy.brig new file mode 100644 index 0000000..c0d1bac Binary files /dev/null and b/src/kernels/global_agent_vector_copy.brig differ diff --git a/src/kernels/global_agent_vector_copy.hsail b/src/kernels/global_agent_vector_copy.hsail new file mode 100644 index 0000000..5048893 --- /dev/null +++ b/src/kernels/global_agent_vector_copy.hsail @@ -0,0 +1,64 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &global_vector_copy:1:0:$full:$large:$default; + +decl prog function &abort()(); + +decl prog alloc(agent) global_u32 &b[]; + +prog kernel &__global_vector_copy_kernel( + kernarg_u64 %a) { +@__global_vector_copy_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s1, 0; + cvt_s64_s32 $d0, $s1; + shl_u64 $d1, $d0, 2; + ld_kernarg_align(8)_width(all)_u64 $d2, [%a]; + add_u64 $d3, $d2, $d1; + ld_global_u32 $s0, [$d3]; + st_global_u32 $s1, [&b][$d1]; + ret; +}; diff --git a/src/kernels/global_vector_copy.brig b/src/kernels/global_vector_copy.brig new file mode 100644 index 0000000..141b000 Binary files /dev/null and b/src/kernels/global_vector_copy.brig differ diff --git a/src/kernels/global_vector_copy.hsail b/src/kernels/global_vector_copy.hsail new file mode 100755 index 0000000..cb2f1ee --- /dev/null +++ b/src/kernels/global_vector_copy.hsail @@ -0,0 +1,64 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &global_vector_copy:1:0:$full:$large:$default; + +decl prog function &abort()(); + +decl prog global_u32 &b[]; + +prog kernel &__global_vector_copy_kernel( + kernarg_u64 %a) { +@__global_vector_copy_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s1, 0; + cvt_s64_s32 $d0, $s1; + shl_u64 $d1, $d0, 2; + ld_kernarg_align(8)_width(all)_u64 $d2, [%a]; + add_u64 $d3, $d2, $d1; + ld_global_u32 $s0, [$d3]; + st_global_u32 $s1, [&b][$d1]; + ret; +}; diff --git a/src/kernels/group_memory.brig b/src/kernels/group_memory.brig new file mode 100644 index 0000000..0da4494 Binary files /dev/null and b/src/kernels/group_memory.brig differ diff --git a/src/kernels/group_memory.hsail b/src/kernels/group_memory.hsail new file mode 100644 index 0000000..a2745bb --- /dev/null +++ b/src/kernels/group_memory.hsail @@ -0,0 +1,207 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &group_memory:1:0:$full:$large:$default; + +decl prog function &abort()(); + +/* + * Kernels: group_memory_static_kernel + * + * Description: The purpose of the group memory kernel + * is to generate a BRIG file that requires group memory. + * + * Psuedo code: + * + * __kernel void group_memory_static(__global uint *in, + * __global uint *out, + * __private uint count) { + * __private uint gid; + * __local uint grp[256]; + * + * gid = get_global_id(0); + * + * if(lid > count) { + * return; + * } + * + * grp[gid] = in[gid]; + * + * barrier(CLK_LOCAL_MEM_FENCE); + * + * out[gid] = grp[gid]; + * + * return; + * } + * + */ + +prog kernel &__group_memory_static_kernel( + kernarg_u64 %in, + kernarg_u64 %out, + kernarg_u32 %count) { + + group_u32 %grp[256]; + +@__group_memory_static_kernel_entry: + // BB#0: // %entry + ld_kernarg_align(4)_width(all)_u32 $s0, [%count]; + workitemid_u32 $s1, 0; + cmp_gt_b1_u32 $c0, $s1, $s0; + cbr_b1 $c0, @BB0_2; + // BB#1: // %if.end + ld_kernarg_align(8)_width(all)_u64 $d0, [%in]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%out]; + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d2, $s0; + shl_u64 $d2, $d2, 2; + shl_u32 $s0, $s0, 2; + add_u64 $d0, $d0, $d2; + add_u64 $d1, $d1, $d2; + + //Load the global data. + ld_global_u32 $s1, [$d0]; + + //Store the value to group memory. + st_group_u32 $s1, [%grp][$s0]; + + barrier; + + //Load the value from group memory. + ld_group_u32 $s1, [%grp][$s0]; + + //Store the value to global memory. + st_global_u32 $s1, [$d1]; + +@BB0_2: + // %return + ret; +}; + +/* + * Kernels: group_memory_dynamic_kernel + * + * Description: The purpose of the group memory kernel + * is to generate a BRIG file that requires dynamic group memory. + * + * Psuedo code: + * + * __kernel void group_memory_dynamic(__global uint *in, + * __global uint *out, + * __local uint *grp_offset, + * __private uint count) { + * __private uint gid; + * + * gid = get_global_id(0); + * + * if(lid > count) { + * return; + * } + * + * grp_offset[gid] = in[gid]; + * + * barrier(CLK_LOCAL_MEM_FENCE); + * + * out[gid] = grp_offset[gid]; + * + * return; + * } + * + */ + +prog kernel &__group_memory_dynamic_kernel( + kernarg_u64 %in, + kernarg_u64 %out, + kernarg_u32 %grp_offset, + kernarg_u32 %count) { + + // Define some static group memory so the offset is not 0. + group_u32 %grp[256]; + +@__group_memory_dynamic_kernel_entry: + // BB#0: // %entry + ld_kernarg_align(4)_width(all)_u32 $s0, [%count]; + workitemid_u32 $s1, 0; + cmp_gt_b1_u32 $c0, $s1, $s0; + cbr_b1 $c0, @BB0_2; + // BB#1: // %if.end + ld_kernarg_align(8)_width(all)_u64 $d0, [%in]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%out]; + ld_kernarg_align(8)_width(all)_u32 $s2, [%grp_offset]; + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 2; + shl_u32 $s1, $s0, 2; + add_u64 $d0, $d0, $d3; + add_u64 $d1, $d1, $d3; + + //Get the base pointer to group memory. + groupbaseptr_u32 $s3; + + //Add the offset to the base pointer. + add_u32 $s2, $s2, $s3; + + //Add the gid offset to the specific work-items + //group memory element. + add_u32 $s2, $s2, $s1; + + //Load the global data. + ld_global_u32 $s1, [$d0]; + + //Store the data into group memory. + st_group_u32 $s1, [$s2]; + + barrier; + + //Load the data from group memory. + ld_group_u32 $s1, [$s2]; + + //Store the data to global memory. + st_global_u32 $s1, [$d1]; + +@BB0_2: + // %return + ret; +}; diff --git a/src/kernels/init_data.brig b/src/kernels/init_data.brig new file mode 100644 index 0000000..2ffedbb Binary files /dev/null and b/src/kernels/init_data.brig differ diff --git a/src/kernels/init_data.hsail b/src/kernels/init_data.hsail new file mode 100644 index 0000000..4def97d --- /dev/null +++ b/src/kernels/init_data.hsail @@ -0,0 +1,193 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &init_data:1:0:$full:$large:$default; + +/* + * Kernels: init__data + * + * Description: The init__data kernels initialize a + * logical 1d, 2d or 3d block of data using a specified + * value. The row pitch parameter specifies the size of + * each logical row and the slice_pitch parameter specifies + * the size of each logical 2D sheet. Kernel for int, float + * and double are specified. + * + * Psuedo code: + * + * __kernel void init__data( + * __global *data, + * value, + * uint row_pitch, + * uint slice_pitch) { + * uint dims = get_work_dim(); + * size_t idx = get_global_id(0); + * idx += (dims > 1) ? (get_global_id(1) * row_pitch) : 0; + * idx += (dims > 2) ? (get_global_id(2) * slice_pitch) : 0; + * data[idx] = value; + * return; + * } + * + */ + +decl prog function &abort()(); + +prog kernel &__init_int_data_kernel( + kernarg_u64 %data, + kernarg_u32 %value, + kernarg_u32 %row_pitch, + kernarg_u32 %slice_pitch) { +@__init_int_data_kernel_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d1, $s0; + ld_kernarg_align(4)_width(all)_u32 $s0, [%value]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + cmp_lt_b1_u32 $c0, $s1, 2; + cbr_b1 $c0, @BB0_3; + // BB#1: // %cond.end + ld_kernarg_align(4)_width(all)_u32 $s2, [%row_pitch]; + cvt_u64_u32 $d3, $s2; + workitemabsid_u32 $s2, 1; + cvt_u64_u32 $d4, $s2; + mad_u64 $d1, $d4, $d3, $d1; + cmp_lt_b1_u32 $c0, $s1, 3; + cbr_b1 $c0, @BB0_3; + // BB#2: // %cond.true5 + ld_kernarg_align(4)_width(all)_u32 $s1, [%slice_pitch]; + cvt_u64_u32 $d2, $s1; + workitemabsid_u32 $s1, 2; + cvt_u64_u32 $d3, $s1; + mul_u64 $d2, $d3, $d2; + +@BB0_3: + // %cond.end10 + add_u64 $d1, $d2, $d1; + shl_u64 $d1, $d1, 2; + add_u64 $d0, $d0, $d1; + st_global_u32 $s0, [$d0]; + ret; +}; + +prog kernel &__init_float_data_kernel( + kernarg_u64 %data, + kernarg_f32 %value, + kernarg_u32 %row_pitch, + kernarg_u32 %slice_pitch) { +@__init_float_data_kernel_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d1, $s0; + ld_kernarg_align(4)_width(all)_f32 $s0, [%value]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + cmp_lt_b1_u32 $c0, $s1, 2; + cbr_b1 $c0, @BB1_3; + // BB#1: // %cond.end + ld_kernarg_align(4)_width(all)_u32 $s2, [%row_pitch]; + cvt_u64_u32 $d3, $s2; + workitemabsid_u32 $s2, 1; + cvt_u64_u32 $d4, $s2; + mad_u64 $d1, $d4, $d3, $d1; + cmp_lt_b1_u32 $c0, $s1, 3; + cbr_b1 $c0, @BB1_3; + // BB#2: // %cond.true5 + ld_kernarg_align(4)_width(all)_u32 $s1, [%slice_pitch]; + cvt_u64_u32 $d2, $s1; + workitemabsid_u32 $s1, 2; + cvt_u64_u32 $d3, $s1; + mul_u64 $d2, $d3, $d2; + +@BB1_3: + // %cond.end10 + add_u64 $d1, $d2, $d1; + shl_u64 $d1, $d1, 2; + add_u64 $d0, $d0, $d1; + st_global_f32 $s0, [$d0]; + ret; +}; + +prog kernel &__init_double_data_kernel( + kernarg_u64 %data, + kernarg_f64 %value, + kernarg_u32 %row_pitch, + kernarg_u32 %slice_pitch) { +@__init_double_data_kernel_entry: + +// BB#0: // %entry + mov_b64 $d3, 0; + dim_u32 $s0; + workitemabsid_u32 $s1, 0; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_f64 $d0, [%value]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%data]; + cmp_lt_b1_u32 $c0, $s0, 2; + cbr_b1 $c0, @BB2_3; + // BB#1: // %cond.end + ld_kernarg_align(4)_width(all)_u32 $s1, [%row_pitch]; + cvt_u64_u32 $d4, $s1; + workitemabsid_u32 $s1, 1; + cvt_u64_u32 $d5, $s1; + mad_u64 $d2, $d5, $d4, $d2; + cmp_lt_b1_u32 $c0, $s0, 3; + cbr_b1 $c0, @BB2_3; + // BB#2: // %cond.true5 + ld_kernarg_align(4)_width(all)_u32 $s0, [%slice_pitch]; + cvt_u64_u32 $d3, $s0; + workitemabsid_u32 $s0, 2; + cvt_u64_u32 $d4, $s0; + mul_u64 $d3, $d4, $d3; + +@BB2_3: + // %cond.end10 + add_u64 $d2, $d3, $d2; + shl_u64 $d2, $d2, 3; //64bits double, need to shift by three bits to calculate the index + add_u64 $d1, $d1, $d2; //add the index and the base address to calculate the address of the elements + st_global_f64 $d0, [$d1]; + ret; +}; diff --git a/src/kernels/kernarg_align.brig b/src/kernels/kernarg_align.brig new file mode 100644 index 0000000..4c6c91d Binary files /dev/null and b/src/kernels/kernarg_align.brig differ diff --git a/src/kernels/kernarg_align.hsail b/src/kernels/kernarg_align.hsail new file mode 100644 index 0000000..cf3aab7 --- /dev/null +++ b/src/kernels/kernarg_align.hsail @@ -0,0 +1,3793 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &kernarg_align:1:0:$full:$large:$default; + +decl prog function &abort()(); + +/* + * Kernel name: __kernarg_8_u64_8_u64_kernel + */ + +prog kernel &__kernarg_8_u64_8_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_8_u64_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_16_u64_kernel + */ + +prog kernel &__kernarg_8_u64_16_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_8_u64_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_32_u64_kernel + */ + +prog kernel &__kernarg_8_u64_32_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_8_u64_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_64_u64_kernel + */ + +prog kernel &__kernarg_8_u64_64_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_8_u64_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_128_u64_kernel + */ + +prog kernel &__kernarg_8_u64_128_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_8_u64_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_256_u64_kernel + */ + +prog kernel &__kernarg_8_u64_256_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_8_u64_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_8_u64_kernel + */ + +prog kernel &__kernarg_16_u64_8_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_16_u64_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_16_u64_kernel + */ + +prog kernel &__kernarg_16_u64_16_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_16_u64_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_32_u64_kernel + */ + +prog kernel &__kernarg_16_u64_32_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_16_u64_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_64_u64_kernel + */ + +prog kernel &__kernarg_16_u64_64_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_16_u64_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_128_u64_kernel + */ + +prog kernel &__kernarg_16_u64_128_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_16_u64_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_256_u64_kernel + */ + +prog kernel &__kernarg_16_u64_256_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_16_u64_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_8_u64_kernel + */ + +prog kernel &__kernarg_32_u64_8_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_32_u64_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_16_u64_kernel + */ + +prog kernel &__kernarg_32_u64_16_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_32_u64_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_32_u64_kernel + */ + +prog kernel &__kernarg_32_u64_32_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_32_u64_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_64_u64_kernel + */ + +prog kernel &__kernarg_32_u64_64_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_32_u64_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_128_u64_kernel + */ + +prog kernel &__kernarg_32_u64_128_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_32_u64_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_256_u64_kernel + */ + +prog kernel &__kernarg_32_u64_256_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_32_u64_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_8_u64_kernel + */ + +prog kernel &__kernarg_64_u64_8_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_64_u64_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_16_u64_kernel + */ + +prog kernel &__kernarg_64_u64_16_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_64_u64_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_32_u64_kernel + */ + +prog kernel &__kernarg_64_u64_32_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_64_u64_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_64_u64_kernel + */ + +prog kernel &__kernarg_64_u64_64_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_64_u64_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_128_u64_kernel + */ + +prog kernel &__kernarg_64_u64_128_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_64_u64_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_256_u64_kernel + */ + +prog kernel &__kernarg_64_u64_256_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_64_u64_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_8_u64_kernel + */ + +prog kernel &__kernarg_128_u64_8_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_128_u64_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_16_u64_kernel + */ + +prog kernel &__kernarg_128_u64_16_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_128_u64_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_32_u64_kernel + */ + +prog kernel &__kernarg_128_u64_32_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_128_u64_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_64_u64_kernel + */ + +prog kernel &__kernarg_128_u64_64_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_128_u64_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_128_u64_kernel + */ + +prog kernel &__kernarg_128_u64_128_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_128_u64_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_256_u64_kernel + */ + +prog kernel &__kernarg_128_u64_256_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_128_u64_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_8_u64_kernel + */ + +prog kernel &__kernarg_256_u64_8_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_256_u64_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_16_u64_kernel + */ + +prog kernel &__kernarg_256_u64_16_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_256_u64_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_32_u64_kernel + */ + +prog kernel &__kernarg_256_u64_32_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_256_u64_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_64_u64_kernel + */ + +prog kernel &__kernarg_256_u64_64_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_256_u64_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_128_u64_kernel + */ + +prog kernel &__kernarg_256_u64_128_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_256_u64_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_256_u64_kernel + */ + +prog kernel &__kernarg_256_u64_256_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_256_u64_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_8_u32_kernel + */ + +prog kernel &__kernarg_8_u64_8_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_8_u64_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_16_u32_kernel + */ + +prog kernel &__kernarg_8_u64_16_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_8_u64_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_32_u32_kernel + */ + +prog kernel &__kernarg_8_u64_32_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_8_u64_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_64_u32_kernel + */ + +prog kernel &__kernarg_8_u64_64_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_8_u64_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_128_u32_kernel + */ + +prog kernel &__kernarg_8_u64_128_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_8_u64_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u64_256_u32_kernel + */ + +prog kernel &__kernarg_8_u64_256_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u64 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_8_u64_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_8_u32_kernel + */ + +prog kernel &__kernarg_16_u64_8_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_16_u64_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_16_u32_kernel + */ + +prog kernel &__kernarg_16_u64_16_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_16_u64_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_32_u32_kernel + */ + +prog kernel &__kernarg_16_u64_32_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_16_u64_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_64_u32_kernel + */ + +prog kernel &__kernarg_16_u64_64_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_16_u64_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_128_u32_kernel + */ + +prog kernel &__kernarg_16_u64_128_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_16_u64_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u64_256_u32_kernel + */ + +prog kernel &__kernarg_16_u64_256_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u64 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_16_u64_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_8_u32_kernel + */ + +prog kernel &__kernarg_32_u64_8_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_32_u64_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_16_u32_kernel + */ + +prog kernel &__kernarg_32_u64_16_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_32_u64_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_32_u32_kernel + */ + +prog kernel &__kernarg_32_u64_32_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_32_u64_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_64_u32_kernel + */ + +prog kernel &__kernarg_32_u64_64_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_32_u64_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_128_u32_kernel + */ + +prog kernel &__kernarg_32_u64_128_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_32_u64_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u64_256_u32_kernel + */ + +prog kernel &__kernarg_32_u64_256_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u64 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_32_u64_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_8_u32_kernel + */ + +prog kernel &__kernarg_64_u64_8_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_64_u64_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_16_u32_kernel + */ + +prog kernel &__kernarg_64_u64_16_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_64_u64_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_32_u32_kernel + */ + +prog kernel &__kernarg_64_u64_32_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_64_u64_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_64_u32_kernel + */ + +prog kernel &__kernarg_64_u64_64_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_64_u64_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_128_u32_kernel + */ + +prog kernel &__kernarg_64_u64_128_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_64_u64_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u64_256_u32_kernel + */ + +prog kernel &__kernarg_64_u64_256_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u64 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_64_u64_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_8_u32_kernel + */ + +prog kernel &__kernarg_128_u64_8_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_128_u64_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_16_u32_kernel + */ + +prog kernel &__kernarg_128_u64_16_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_128_u64_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_32_u32_kernel + */ + +prog kernel &__kernarg_128_u64_32_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_128_u64_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_64_u32_kernel + */ + +prog kernel &__kernarg_128_u64_64_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_128_u64_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_128_u32_kernel + */ + +prog kernel &__kernarg_128_u64_128_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_128_u64_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u64_256_u32_kernel + */ + +prog kernel &__kernarg_128_u64_256_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u64 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_128_u64_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_8_u32_kernel + */ + +prog kernel &__kernarg_256_u64_8_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_256_u64_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_16_u32_kernel + */ + +prog kernel &__kernarg_256_u64_16_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_256_u64_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_32_u32_kernel + */ + +prog kernel &__kernarg_256_u64_32_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_256_u64_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_64_u32_kernel + */ + +prog kernel &__kernarg_256_u64_64_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_256_u64_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_128_u32_kernel + */ + +prog kernel &__kernarg_256_u64_128_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_256_u64_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u64_256_u32_kernel + */ + +prog kernel &__kernarg_256_u64_256_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u64 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_256_u64_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u64 $d2, [%arg_first]; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_8_u64_kernel + */ + +prog kernel &__kernarg_8_u32_8_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_8_u32_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_16_u64_kernel + */ + +prog kernel &__kernarg_8_u32_16_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_8_u32_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_32_u64_kernel + */ + +prog kernel &__kernarg_8_u32_32_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_8_u32_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_64_u64_kernel + */ + +prog kernel &__kernarg_8_u32_64_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_8_u32_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_128_u64_kernel + */ + +prog kernel &__kernarg_8_u32_128_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_8_u32_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_256_u64_kernel + */ + +prog kernel &__kernarg_8_u32_256_u64_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_8_u32_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_8_u64_kernel + */ + +prog kernel &__kernarg_16_u32_8_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_16_u32_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_16_u64_kernel + */ + +prog kernel &__kernarg_16_u32_16_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_16_u32_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_32_u64_kernel + */ + +prog kernel &__kernarg_16_u32_32_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_16_u32_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_64_u64_kernel + */ + +prog kernel &__kernarg_16_u32_64_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_16_u32_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_128_u64_kernel + */ + +prog kernel &__kernarg_16_u32_128_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_16_u32_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_256_u64_kernel + */ + +prog kernel &__kernarg_16_u32_256_u64_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_16_u32_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_8_u64_kernel + */ + +prog kernel &__kernarg_32_u32_8_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_32_u32_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_16_u64_kernel + */ + +prog kernel &__kernarg_32_u32_16_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_32_u32_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_32_u64_kernel + */ + +prog kernel &__kernarg_32_u32_32_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_32_u32_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_64_u64_kernel + */ + +prog kernel &__kernarg_32_u32_64_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_32_u32_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_128_u64_kernel + */ + +prog kernel &__kernarg_32_u32_128_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_32_u32_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_256_u64_kernel + */ + +prog kernel &__kernarg_32_u32_256_u64_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_32_u32_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_8_u64_kernel + */ + +prog kernel &__kernarg_64_u32_8_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_64_u32_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_16_u64_kernel + */ + +prog kernel &__kernarg_64_u32_16_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_64_u32_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_32_u64_kernel + */ + +prog kernel &__kernarg_64_u32_32_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_64_u32_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_64_u64_kernel + */ + +prog kernel &__kernarg_64_u32_64_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_64_u32_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_128_u64_kernel + */ + +prog kernel &__kernarg_64_u32_128_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_64_u32_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_256_u64_kernel + */ + +prog kernel &__kernarg_64_u32_256_u64_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_64_u32_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_8_u64_kernel + */ + +prog kernel &__kernarg_128_u32_8_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_128_u32_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_16_u64_kernel + */ + +prog kernel &__kernarg_128_u32_16_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_128_u32_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_32_u64_kernel + */ + +prog kernel &__kernarg_128_u32_32_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_128_u32_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_64_u64_kernel + */ + +prog kernel &__kernarg_128_u32_64_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_128_u32_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_128_u64_kernel + */ + +prog kernel &__kernarg_128_u32_128_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_128_u32_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_256_u64_kernel + */ + +prog kernel &__kernarg_128_u32_256_u64_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_128_u32_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_8_u64_kernel + */ + +prog kernel &__kernarg_256_u32_8_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(8) kernarg_u64 %arg_second) { +@__kernarg_256_u32_8_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_16_u64_kernel + */ + +prog kernel &__kernarg_256_u32_16_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(16) kernarg_u64 %arg_second) { +@__kernarg_256_u32_16_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_32_u64_kernel + */ + +prog kernel &__kernarg_256_u32_32_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(32) kernarg_u64 %arg_second) { +@__kernarg_256_u32_32_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_64_u64_kernel + */ + +prog kernel &__kernarg_256_u32_64_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(64) kernarg_u64 %arg_second) { +@__kernarg_256_u32_64_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_128_u64_kernel + */ + +prog kernel &__kernarg_256_u32_128_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(128) kernarg_u64 %arg_second) { +@__kernarg_256_u32_128_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_256_u64_kernel + */ + +prog kernel &__kernarg_256_u32_256_u64_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(256) kernarg_u64 %arg_second) { +@__kernarg_256_u32_256_u64_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u64 $d3, [%arg_second]; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_8_u32_kernel + */ + +prog kernel &__kernarg_8_u32_8_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_8_u32_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_16_u32_kernel + */ + +prog kernel &__kernarg_8_u32_16_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_8_u32_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_32_u32_kernel + */ + +prog kernel &__kernarg_8_u32_32_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_8_u32_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_64_u32_kernel + */ + +prog kernel &__kernarg_8_u32_64_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_8_u32_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_128_u32_kernel + */ + +prog kernel &__kernarg_8_u32_128_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_8_u32_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_8_u32_256_u32_kernel + */ + +prog kernel &__kernarg_8_u32_256_u32_kernel( + kernarg_u64 %arg_output, + align(8) kernarg_u32 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_8_u32_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_8_u32_kernel + */ + +prog kernel &__kernarg_16_u32_8_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_16_u32_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_16_u32_kernel + */ + +prog kernel &__kernarg_16_u32_16_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_16_u32_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_32_u32_kernel + */ + +prog kernel &__kernarg_16_u32_32_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_16_u32_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_64_u32_kernel + */ + +prog kernel &__kernarg_16_u32_64_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_16_u32_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_128_u32_kernel + */ + +prog kernel &__kernarg_16_u32_128_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_16_u32_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_16_u32_256_u32_kernel + */ + +prog kernel &__kernarg_16_u32_256_u32_kernel( + kernarg_u64 %arg_output, + align(16) kernarg_u32 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_16_u32_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(16)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_8_u32_kernel + */ + +prog kernel &__kernarg_32_u32_8_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_32_u32_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_16_u32_kernel + */ + +prog kernel &__kernarg_32_u32_16_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_32_u32_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_32_u32_kernel + */ + +prog kernel &__kernarg_32_u32_32_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_32_u32_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_64_u32_kernel + */ + +prog kernel &__kernarg_32_u32_64_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_32_u32_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_128_u32_kernel + */ + +prog kernel &__kernarg_32_u32_128_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_32_u32_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_32_u32_256_u32_kernel + */ + +prog kernel &__kernarg_32_u32_256_u32_kernel( + kernarg_u64 %arg_output, + align(32) kernarg_u32 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_32_u32_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(32)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_8_u32_kernel + */ + +prog kernel &__kernarg_64_u32_8_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_64_u32_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_16_u32_kernel + */ + +prog kernel &__kernarg_64_u32_16_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_64_u32_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_32_u32_kernel + */ + +prog kernel &__kernarg_64_u32_32_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_64_u32_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_64_u32_kernel + */ + +prog kernel &__kernarg_64_u32_64_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_64_u32_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_128_u32_kernel + */ + +prog kernel &__kernarg_64_u32_128_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_64_u32_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_64_u32_256_u32_kernel + */ + +prog kernel &__kernarg_64_u32_256_u32_kernel( + kernarg_u64 %arg_output, + align(64) kernarg_u32 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_64_u32_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(64)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_8_u32_kernel + */ + +prog kernel &__kernarg_128_u32_8_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_128_u32_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_16_u32_kernel + */ + +prog kernel &__kernarg_128_u32_16_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_128_u32_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_32_u32_kernel + */ + +prog kernel &__kernarg_128_u32_32_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_128_u32_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_64_u32_kernel + */ + +prog kernel &__kernarg_128_u32_64_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_128_u32_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_128_u32_kernel + */ + +prog kernel &__kernarg_128_u32_128_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_128_u32_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_128_u32_256_u32_kernel + */ + +prog kernel &__kernarg_128_u32_256_u32_kernel( + kernarg_u64 %arg_output, + align(128) kernarg_u32 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_128_u32_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(128)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_8_u32_kernel + */ + +prog kernel &__kernarg_256_u32_8_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(8) kernarg_u32 %arg_second) { +@__kernarg_256_u32_8_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(8)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_16_u32_kernel + */ + +prog kernel &__kernarg_256_u32_16_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(16) kernarg_u32 %arg_second) { +@__kernarg_256_u32_16_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(16)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_32_u32_kernel + */ + +prog kernel &__kernarg_256_u32_32_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(32) kernarg_u32 %arg_second) { +@__kernarg_256_u32_32_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(32)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_64_u32_kernel + */ + +prog kernel &__kernarg_256_u32_64_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(64) kernarg_u32 %arg_second) { +@__kernarg_256_u32_64_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(64)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_128_u32_kernel + */ + +prog kernel &__kernarg_256_u32_128_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(128) kernarg_u32 %arg_second) { +@__kernarg_256_u32_128_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(128)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + +/* + * Kernel name: __kernarg_256_u32_256_u32_kernel + */ + +prog kernel &__kernarg_256_u32_256_u32_kernel( + kernarg_u64 %arg_output, + align(256) kernarg_u32 %arg_first, + align(256) kernarg_u32 %arg_second) { +@__kernarg_256_u32_256_u32_kernel: + // BB#0: + //Obtain the output location for this workitem + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d3, $s0; + shl_u64 $d3, $d3, 3; + ld_kernarg_align(256)_width(all)_u64 $d0, [%arg_output]; + add_u64 $d0, $d0, $d3; + //Load and sum the values + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_first]; + cvt_u64_u32 $d2, $s1; + ld_kernarg_align(256)_width(all)_u32 $s1, [%arg_second]; + cvt_u64_u32 $d3, $s1; + add_u64 $d3, $d2, $d3; + st_global_u64 $d3, [$d0]; + // %return + ret; +}; + diff --git a/src/kernels/kernel_dispatch.brig b/src/kernels/kernel_dispatch.brig new file mode 100644 index 0000000..6097bd6 Binary files /dev/null and b/src/kernels/kernel_dispatch.brig differ diff --git a/src/kernels/kernel_dispatch.hsail b/src/kernels/kernel_dispatch.hsail new file mode 100644 index 0000000..9b01593 --- /dev/null +++ b/src/kernels/kernel_dispatch.hsail @@ -0,0 +1,305 @@ +module &_tmp_snk_31210_updated_opt_bc:1:0:$full:$large:$default; +extension "amd:gcn"; +extension "IMAGE"; + +decl function &kernel_dispatch()( + arg_u64 %queue, + arg_u64 %aql, + arg_u64 %lparm_d); + +decl prog function &hsa_queue_load_write_index_relaxed(arg_u64 %ret)(arg_u64 %arg_p0); + +decl prog function &hsa_queue_store_write_index_relaxed()( + arg_u64 %arg_p0, + arg_u64 %arg_p1); + +decl prog function &hsa_signal_store_relaxed()( + arg_u64 %arg_p0, + arg_u64 %arg_p1); + +decl prog function &abort()(); + +prog kernel &__OpenCL_kernel1_kernel( + kernarg_u64 %__global_offset_0, + kernarg_u64 %__global_offset_1, + kernarg_u64 %__global_offset_2, + kernarg_u64 %__printf_buffer, + kernarg_u64 %__vqueue_pointer, + kernarg_u64 %__aqlwrap_pointer, + kernarg_u64 %queue, + kernarg_u64 %aql, + kernarg_u64 %lparm_d) +{ + pragma "AMD RTI", "ARGSTART:__OpenCL_kernel1_kernel"; + pragma "AMD RTI", "version:3:1:104"; + pragma "AMD RTI", "device:generic"; + pragma "AMD RTI", "uniqueid:1024"; + pragma "AMD RTI", "memory:private:0"; + pragma "AMD RTI", "memory:region:0"; + pragma "AMD RTI", "memory:local:0"; + pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0"; + pragma "AMD RTI", "value:__global_offset_1:u64:1:1:16"; + pragma "AMD RTI", "value:__global_offset_2:u64:1:1:32"; + pragma "AMD RTI", "pointer:__printf_buffer:u8:1:1:48:uav:7:1:RW:0:0:0"; + pragma "AMD RTI", "value:__vqueue_pointer:u64:1:1:64"; + pragma "AMD RTI", "value:__aqlwrap_pointer:u64:1:1:80"; + pragma "AMD RTI", "pointer:queue:struct:1:1:96:uav:7:64:RW:0:0:0"; + pragma "AMD RTI", "pointer:aql:struct:1:1:112:uav:7:64:RW:0:0:0"; + pragma "AMD RTI", "pointer:lparm_d:struct:1:1:128:uav:7:128:RW:0:0:0"; + pragma "AMD RTI", "constarg:8:lparm_d"; + pragma "AMD RTI", "function:1:0"; + pragma "AMD RTI", "memory:64bitABI"; + pragma "AMD RTI", "privateid:8"; + pragma "AMD RTI", "enqueue_kernel:0"; + pragma "AMD RTI", "kernel_index:0"; + pragma "AMD RTI", "reflection:0:size_t"; + pragma "AMD RTI", "reflection:1:size_t"; + pragma "AMD RTI", "reflection:2:size_t"; + pragma "AMD RTI", "reflection:3:size_t"; + pragma "AMD RTI", "reflection:4:size_t"; + pragma "AMD RTI", "reflection:5:size_t"; + pragma "AMD RTI", "reflection:6:hsa_queue_t*"; + pragma "AMD RTI", "reflection:7:hsa_kernel_dispatch_packet_t*"; + pragma "AMD RTI", "reflection:8:snk_lparm_t*"; + pragma "AMD RTI", "ARGEND:__OpenCL_kernel1_kernel"; + +@__OpenCL_kernel1_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%queue]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%aql]; + ld_kernarg_align(8)_width(all)_u64 $d2, [%lparm_d]; + { + arg_u64 %queue; + arg_u64 %aql; + arg_u64 %lparm_d; + st_arg_align(8)_u64 $d0, [%queue]; + st_arg_align(8)_u64 $d1, [%aql]; + st_arg_align(8)_u64 $d2, [%lparm_d]; + call &kernel_dispatch () (%queue, %aql, %lparm_d); + } + ret; +}; + +function &kernel_dispatch()( + arg_u64 %queue, + arg_u64 %aql, + arg_u64 %lparm_d) +{ + align(8) private_u8 %__privateStack[8]; + +@kernel_dispatch_entry: + // BB#0: + ld_arg_align(8)_u64 $d0, [%queue]; + ld_global_align(4)_u32 $s0, [$d0+24]; + add_u32 $s0, $s0, 4294967295; + mov_b64 $d2, $d0; + { + arg_u64 %hsa_queue_load_write_index_relaxed; + arg_u64 %__param_p0; + st_arg_align(8)_u64 $d2, [%__param_p0]; + call &hsa_queue_load_write_index_relaxed (%hsa_queue_load_write_index_relaxed) (%__param_p0); + ld_arg_align(8)_u64 $d1, [%hsa_queue_load_write_index_relaxed]; + } + cvt_u64_u32 $d3, $s0; + and_b64 $d3, $d3, $d1; + shl_u64 $d3, $d3, 6; + ld_global_align(8)_u64 $d4, [$d0+8]; + add_u64 $d3, $d4, $d3; + ld_arg_align(8)_u64 $d4, [%lparm_d]; + ld_global_align(4)_u32 $s0, [$d4]; + ld_arg_align(8)_u64 $d5, [%aql]; + ld_align(2)_u16 $s1, [$d3+2]; + or_b32 $s0, $s1, $s0; + st_align(2)_u16 $s0, [$d3+2]; + ld_global_align(8)_u32 $s0, [$d4+8]; + st_align(4)_u32 $s0, [$d3+12]; + ld_global_align(8)_u16 $s0, [$d4+32]; + st_align(2)_u16 $s0, [$d3+4]; + ld_global_align(4)_u32 $s0, [$d4]; + cmp_lt_b1_s32 $c0, $s0, 2; + cbr_b1 $c0, @BB1_2; + // BB#1: + ld_global_align(8)_u32 $s0, [$d4+16]; + st_align(4)_u32 $s0, [$d3+16]; + ld_global_align(8)_u16 $s0, [$d4+40]; + st_align(2)_u16 $s0, [$d3+6]; + br @BB1_3; + +@BB1_2: + st_align(4)_u32 1, [$d3+16]; + st_align(2)_u16 1, [$d3+6]; + +@BB1_3: + ld_global_align(4)_u32 $s0, [$d4]; + cmp_lt_b1_s32 $c0, $s0, 3; + cbr_b1 $c0, @BB1_5; + // BB#4: + ld_global_align(8)_u32 $s0, [$d4+24]; + st_align(4)_u32 $s0, [$d3+20]; + ld_global_align(8)_u16 $s0, [$d4+48]; + st_align(2)_u16 $s0, [$d3+8]; + br @BB1_6; + +@BB1_5: + st_align(4)_u32 1, [$d3+20]; + st_align(2)_u16 1, [$d3+8]; + +@BB1_6: + ld_global_align(8)_u64 $d6, [$d5+40]; + st_align(8)_u64 $d6, [$d3+40]; + ld_global_align(8)_u64 $d6, [$d5+32]; + st_align(8)_u64 $d6, [$d3+32]; + ld_global_align(4)_u32 $s0, [$d5+24]; + st_align(4)_u32 $s0, [$d3+24]; + ld_global_align(4)_u32 $s0, [$d5+28]; + st_align(4)_u32 $s0, [$d3+28]; + ld_global_align(8)_u64 $d5, [$d5+56]; + st_align(8)_u64 $d5, [$d3+56]; + ld_global_align(4)_u32 $s0, [$d4+56]; + cmp_le_b1_s32 $c0, $s0, -1; + cbr_b1 $c0, @BB1_7; + // BB#8: + ld_align(2)_u16 $s0, [$d3]; + ld_global_align(4)_u32 $s1, [$d4+60]; + shl_u32 $s1, $s1, 8; + or_b32 $s0, $s0, $s1; + st_align(2)_u16 $s0, [$d3]; + br @BB1_9; + +@BB1_7: + // %._crit_edge + ld_align(2)_u16 $s0, [$d3]; + +@BB1_9: + and_b32 $s0, $s0, 65535; + ld_global_align(4)_u32 $s1, [$d4+64]; + shl_u32 $s1, $s1, 9; + or_b32 $s0, $s0, $s1; + st_align(2)_u16 $s0, [$d3]; + add_u64 $d5, $d1, 1; + ld_global_align(4)_u32 $s1, [$d4+68]; + shl_u32 $s1, $s1, 11; + or_b32 $s0, $s0, $s1; + st_align(2)_u16 $s0, [$d3]; + st_u8 2, [$d3]; + { + arg_u64 %__param_p0; + arg_u64 %__param_p1; + st_arg_align(8)_u64 $d2, [%__param_p0]; + st_arg_align(8)_u64 $d5, [%__param_p1]; + call &hsa_queue_store_write_index_relaxed () (%__param_p0, %__param_p1); + } + lda_private_u32 $s0, [%__privateStack]; + cvt_u64_u32 $d2, $s0; + ld_global_align(8)_u64 $d0, [$d0+16]; + st_private_align(8)_u64 $d0, [%__privateStack]; + { + arg_u64 %__param_p0; + arg_u64 %__param_p1; + st_arg_align(8)_u64 $d2, [%__param_p0]; + st_arg_align(8)_u64 $d1, [%__param_p1]; + call &hsa_signal_store_relaxed () (%__param_p0, %__param_p1); + } + ret; +}; + +prog kernel &__OpenCL_kernel2_kernel( + kernarg_u64 %__global_offset_0, + kernarg_u64 %__global_offset_1, + kernarg_u64 %__global_offset_2, + kernarg_u64 %__printf_buffer, + kernarg_u64 %__vqueue_pointer, + kernarg_u64 %__aqlwrap_pointer, + kernarg_u64 %arg) +{ + pragma "AMD RTI", "ARGSTART:__OpenCL_kernel2_kernel"; + pragma "AMD RTI", "version:3:1:104"; + pragma "AMD RTI", "device:generic"; + pragma "AMD RTI", "uniqueid:1026"; + pragma "AMD RTI", "memory:private:0"; + pragma "AMD RTI", "memory:region:0"; + pragma "AMD RTI", "memory:local:0"; + pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0"; + pragma "AMD RTI", "value:__global_offset_1:u64:1:1:16"; + pragma "AMD RTI", "value:__global_offset_2:u64:1:1:32"; + pragma "AMD RTI", "pointer:__printf_buffer:u8:1:1:48:uav:7:1:RW:0:0:0"; + pragma "AMD RTI", "value:__vqueue_pointer:u64:1:1:64"; + pragma "AMD RTI", "value:__aqlwrap_pointer:u64:1:1:80"; + pragma "AMD RTI", "pointer:arg:u32:1:1:96:uav:7:4:RW:0:0:0"; + pragma "AMD RTI", "function:1:0"; + pragma "AMD RTI", "memory:64bitABI"; + pragma "AMD RTI", "privateid:8"; + pragma "AMD RTI", "enqueue_kernel:0"; + pragma "AMD RTI", "kernel_index:1"; + pragma "AMD RTI", "reflection:0:size_t"; + pragma "AMD RTI", "reflection:1:size_t"; + pragma "AMD RTI", "reflection:2:size_t"; + pragma "AMD RTI", "reflection:3:size_t"; + pragma "AMD RTI", "reflection:4:size_t"; + pragma "AMD RTI", "reflection:5:size_t"; + pragma "AMD RTI", "reflection:6:int*"; + pragma "AMD RTI", "ARGEND:__OpenCL_kernel2_kernel"; + +@__OpenCL_kernel2_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%arg]; + ld_global_align(4)_const_width(all)_u32 $s0, [$d0]; + add_u32 $s0, $s0, 4; + st_global_align(4)_u32 $s0, [$d0]; + ret; +}; + +prog function &hsa_queue_load_write_index_relaxed(arg_u64 %ret)(arg_u64 %arg_p0) +{ + +@hsa_queue_load_write_index_relaxed_entry: + ld_arg_align(8)_u64 $d0, [%arg_p0]; + // d0 has the queue address + addqueuewriteindex_global_rlx_u64 $d1, [$d0], 1; + // d1 has the queue's write_index + st_arg_u64 $d1, [%ret]; + // return write index + ret; +}; + +prog function &hsa_queue_store_write_index_relaxed()( + arg_u64 %arg_p0, + arg_u64 %arg_p1) +{ + +@hsa_queue_store_write_index_relaxed_entry: + ld_arg_align(8)_u64 $d0, [%arg_p0]; + // d0 has the queue address + ld_arg_align(8)_u64 $d1, [%arg_p1]; + // d2 has write index + stqueuewriteindex_global_rlx_u64 [$d0], $d1; + // store write index d1 to queue d0 + ret; +}; + +prog function &hsa_signal_store_relaxed()( + arg_u64 %arg_p0, + arg_u64 %arg_p1) +{ + +@hsa_signal_store_relaxed_entry: + ld_arg_align(8)_u64 $d0, [%arg_p0]; + // d0 has signal handle + ld_arg_align(8)_u64 $d1, [%arg_p1]; + // d2 has value + signalnoret_st_rlx_b64_sig64 $d0, $d1; + // store value d1 to signal handle d0 + ret; +}; + +prog function &hsa_signal_load_relaxed(arg_u64 %ret)(arg_u64 %arg_p0) +{ + +@hsa_signal_load_relaxed_entry: + ld_arg_align(8)_u64 $d0, [%arg_p0]; + // d0 has signal handle + signal_ld_rlx_b64_sig64 $d1, $d0; + // load value d1 from signal handle d0 + st_arg_u64 $d1, [%ret]; + ret; +}; diff --git a/src/kernels/memory_ops.brig b/src/kernels/memory_ops.brig new file mode 100644 index 0000000..7bdc893 Binary files /dev/null and b/src/kernels/memory_ops.brig differ diff --git a/src/kernels/memory_ops.hsail b/src/kernels/memory_ops.hsail new file mode 100644 index 0000000..3697be6 --- /dev/null +++ b/src/kernels/memory_ops.hsail @@ -0,0 +1,4514 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &memory_ops:1:0:$full:$large:$default; + +decl prog function &abort()(); + +/* + * Kernel name: __memory_atomic_ld_global_rlx_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_ld_global_rlx_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_rlx_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + atomic_ld_global_rlx_agent_b32 $s0, [$d1]; + atomicnoret_st_global_rlx_agent_b32 [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_ld_global_rlx_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_ld_global_rlx_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_rlx_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + atomic_ld_global_rlx_agent_b64 $d2, [$d1]; + atomicnoret_st_global_rlx_agent_b64 [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_ld_global_rlx_system_b32_kernel( + */ + +prog kernel &__memory_atomic_ld_global_rlx_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_rlx_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + atomic_ld_global_rlx_system_b32 $s0, [$d1]; + atomicnoret_st_global_rlx_system_b32 [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_ld_global_rlx_system_b64_kernel( + */ + +prog kernel &__memory_atomic_ld_global_rlx_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_rlx_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + atomic_ld_global_rlx_system_b64 $d2, [$d1]; + atomicnoret_st_global_rlx_system_b64 [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_ld_global_scacq_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_ld_global_scacq_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_scacq_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + atomic_ld_global_scacq_agent_b32 $s0, [$d1]; + atomicnoret_st_global_rlx_agent_b32 [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_ld_global_scacq_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_ld_global_scacq_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_scacq_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + atomic_ld_global_scacq_agent_b64 $d2, [$d1]; + atomicnoret_st_global_rlx_agent_b64 [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_ld_global_scacq_system_b32_kernel( + */ + +prog kernel &__memory_atomic_ld_global_scacq_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_scacq_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + atomic_ld_global_scacq_system_b32 $s0, [$d1]; + atomicnoret_st_global_rlx_system_b32 [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_ld_global_scacq_system_b64_kernel( + */ + +prog kernel &__memory_atomic_ld_global_scacq_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_ld_global_scacq_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_ld_global_scacq_system_b64 $d2, [$d1]; + atomicnoret_st_global_rlx_system_b64 [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_rlx_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_rlx_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_rlx_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_rlx_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_rlx_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_rlx_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_rlx_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_rlx_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_rlx_system_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_rlx_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_rlx_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_rlx_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_rlx_system_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_rlx_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_rlx_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_rlx_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scacq_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_scacq_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scacq_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_scacq_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scacq_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_scacq_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scacq_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_scacq_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scacq_system_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_scacq_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scacq_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_scacq_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scacq_system_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_scacq_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scacq_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_scacq_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_screl_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_screl_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_screl_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_screl_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_screl_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_screl_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_screl_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_screl_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_screl_system_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_screl_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_screl_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_screl_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_screl_system_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_screl_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_screl_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_screl_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scar_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_scar_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scar_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_scar_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scar_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_scar_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scar_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_scar_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scar_system_b32_kernel( + */ + +prog kernel &__memory_atomic_and_global_scar_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scar_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_and_global_scar_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_and_global_scar_system_b64_kernel( + */ + +prog kernel &__memory_atomic_and_global_scar_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_and_global_scar_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_and_global_scar_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_rlx_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_rlx_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_rlx_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_rlx_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_rlx_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_rlx_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_rlx_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_rlx_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_rlx_system_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_rlx_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_rlx_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_rlx_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_rlx_system_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_rlx_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_rlx_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_rlx_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scacq_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_scacq_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scacq_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_scacq_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scacq_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_scacq_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scacq_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_scacq_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scacq_system_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_scacq_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scacq_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_scacq_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scacq_system_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_scacq_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scacq_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_scacq_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_screl_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_screl_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_screl_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_screl_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_screl_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_screl_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_screl_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_screl_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_screl_system_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_screl_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_screl_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_screl_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_screl_system_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_screl_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_screl_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_screl_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scar_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_scar_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scar_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_scar_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scar_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_scar_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scar_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_scar_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scar_system_b32_kernel( + */ + +prog kernel &__memory_atomic_or_global_scar_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scar_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_or_global_scar_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_or_global_scar_system_b64_kernel( + */ + +prog kernel &__memory_atomic_or_global_scar_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_or_global_scar_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_or_global_scar_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_rlx_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_rlx_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_rlx_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_rlx_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_rlx_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_rlx_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_rlx_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_rlx_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_rlx_system_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_rlx_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_rlx_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_rlx_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_rlx_system_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_rlx_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_rlx_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_rlx_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scacq_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scacq_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scacq_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_scacq_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scacq_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scacq_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scacq_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_scacq_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scacq_system_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scacq_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scacq_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_scacq_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scacq_system_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scacq_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scacq_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_scacq_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_screl_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_screl_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_screl_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_screl_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_screl_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_screl_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_screl_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_screl_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_screl_system_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_screl_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_screl_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_screl_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_screl_system_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_screl_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_screl_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_screl_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scar_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scar_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scar_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_scar_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scar_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scar_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scar_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_scar_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scar_system_b32_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scar_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scar_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_xor_global_scar_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_xor_global_scar_system_b64_kernel( + */ + +prog kernel &__memory_atomic_xor_global_scar_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_xor_global_scar_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_xor_global_scar_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_rlx_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_rlx_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_rlx_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_rlx_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_rlx_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_rlx_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_rlx_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_rlx_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_rlx_system_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_rlx_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_rlx_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_rlx_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_rlx_system_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_rlx_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_rlx_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_rlx_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scacq_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scacq_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scacq_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_scacq_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scacq_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scacq_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scacq_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_scacq_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scacq_system_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scacq_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scacq_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_scacq_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scacq_system_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scacq_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scacq_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_scacq_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_screl_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_screl_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_screl_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_screl_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_screl_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_screl_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_screl_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_screl_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_screl_system_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_screl_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_screl_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_screl_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_screl_system_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_screl_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_screl_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_screl_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scar_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scar_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scar_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_scar_agent_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scar_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scar_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scar_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_scar_agent_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scar_system_b32_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scar_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scar_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_exch_global_scar_system_b32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_exch_global_scar_system_b64_kernel( + */ + +prog kernel &__memory_atomic_exch_global_scar_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_exch_global_scar_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_exch_global_scar_system_b64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_rlx_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_rlx_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_rlx_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_rlx_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_system_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_rlx_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_system_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_rlx_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_system_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_rlx_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_rlx_system_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_rlx_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_rlx_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_rlx_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scacq_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scacq_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scacq_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scacq_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_system_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scacq_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_system_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scacq_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_system_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scacq_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scacq_system_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scacq_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scacq_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scacq_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_screl_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_screl_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_screl_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_screl_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_system_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_screl_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_system_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_screl_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_system_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_screl_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_screl_system_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_screl_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_screl_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_screl_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scar_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scar_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scar_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scar_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_system_u32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scar_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_system_u64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scar_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_system_s32_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_add_global_scar_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_add_global_scar_system_s64_kernel( + */ + +prog kernel &__memory_atomic_add_global_scar_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_add_global_scar_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_add_global_scar_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_rlx_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_rlx_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_rlx_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_rlx_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_system_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_rlx_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_system_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_rlx_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_system_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_rlx_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_rlx_system_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_rlx_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_rlx_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_rlx_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scacq_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scacq_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scacq_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scacq_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_system_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scacq_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_system_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scacq_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_system_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scacq_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scacq_system_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scacq_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scacq_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scacq_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_screl_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_screl_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_screl_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_screl_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_system_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_screl_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_system_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_screl_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_system_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_screl_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_screl_system_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_screl_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_screl_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_screl_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scar_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scar_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scar_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scar_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_system_u32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scar_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_system_u64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scar_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_system_s32_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_sub_global_scar_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_sub_global_scar_system_s64_kernel( + */ + +prog kernel &__memory_atomic_sub_global_scar_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_sub_global_scar_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_sub_global_scar_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_rlx_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_rlx_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_rlx_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_rlx_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_rlx_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_rlx_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_rlx_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_rlx_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_rlx_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_rlx_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_rlx_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_rlx_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_rlx_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_rlx_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_rlx_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_rlx_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scacq_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scacq_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scacq_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_scacq_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scacq_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scacq_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scacq_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_scacq_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scacq_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scacq_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scacq_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_scacq_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scacq_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scacq_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scacq_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_scacq_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_screl_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_screl_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_screl_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_screl_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_screl_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_screl_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_screl_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_screl_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_screl_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_screl_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_screl_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_screl_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_screl_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_screl_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_screl_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_screl_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scar_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scar_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scar_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_scar_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scar_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scar_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scar_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_scar_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scar_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scar_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scar_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapinc_global_scar_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapinc_global_scar_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapinc_global_scar_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapinc_global_scar_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapinc_global_scar_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_rlx_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_rlx_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_rlx_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_rlx_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_rlx_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_rlx_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_rlx_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_rlx_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_rlx_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_rlx_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_rlx_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_rlx_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_rlx_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_rlx_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_rlx_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_rlx_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scacq_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scacq_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scacq_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_scacq_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scacq_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scacq_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scacq_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_scacq_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scacq_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scacq_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scacq_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_scacq_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scacq_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scacq_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scacq_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_scacq_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_screl_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_screl_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_screl_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_screl_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_screl_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_screl_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_screl_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_screl_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_screl_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_screl_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_screl_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_screl_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_screl_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_screl_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_screl_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_screl_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scar_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scar_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scar_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_scar_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scar_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scar_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scar_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_scar_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scar_system_u32_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scar_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scar_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_wrapdec_global_scar_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_wrapdec_global_scar_system_u64_kernel( + */ + +prog kernel &__memory_atomic_wrapdec_global_scar_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_wrapdec_global_scar_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_wrapdec_global_scar_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_rlx_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_rlx_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_rlx_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_rlx_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_system_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_rlx_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_system_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_rlx_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_system_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_rlx_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_rlx_system_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_rlx_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_rlx_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_rlx_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scacq_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scacq_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scacq_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scacq_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_system_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scacq_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_system_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scacq_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_system_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scacq_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scacq_system_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scacq_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scacq_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scacq_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_screl_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_screl_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_screl_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_screl_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_system_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_screl_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_system_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_screl_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_system_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_screl_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_screl_system_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_screl_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_screl_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_screl_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scar_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scar_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scar_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scar_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_system_u32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scar_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_system_u64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scar_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_system_s32_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_max_global_scar_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_max_global_scar_system_s64_kernel( + */ + +prog kernel &__memory_atomic_max_global_scar_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_max_global_scar_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_max_global_scar_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_rlx_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_rlx_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_rlx_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_rlx_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_system_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_rlx_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_system_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_rlx_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_system_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_rlx_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_rlx_system_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_rlx_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_rlx_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_rlx_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scacq_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scacq_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scacq_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scacq_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_system_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scacq_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_system_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scacq_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_system_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scacq_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scacq_system_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scacq_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scacq_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scacq_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_screl_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_screl_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_screl_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_screl_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_system_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_screl_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_system_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_screl_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_system_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_screl_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_screl_system_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_screl_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_screl_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_screl_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_agent_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_agent_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_agent_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scar_agent_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_agent_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_agent_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_agent_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scar_agent_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_agent_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_agent_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_agent_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scar_agent_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_agent_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_agent_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_agent_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scar_agent_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_system_u32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_system_u32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_system_u32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scar_system_u32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_system_u64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_system_u64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_system_u64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scar_system_u64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_system_s32_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_system_s32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_system_s32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_min_global_scar_system_s32 $s1, [$d0], $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_min_global_scar_system_s64_kernel( + */ + +prog kernel &__memory_atomic_min_global_scar_system_s64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_min_global_scar_system_s64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_min_global_scar_system_s64 $d1, [$d0], $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_rlx_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_rlx_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_rlx_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_rlx_agent_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_rlx_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_rlx_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_rlx_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_rlx_agent_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_rlx_system_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_rlx_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_rlx_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_rlx_system_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_rlx_system_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_rlx_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_rlx_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_rlx_system_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scacq_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scacq_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scacq_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_scacq_agent_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scacq_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scacq_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scacq_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_scacq_agent_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scacq_system_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scacq_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scacq_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_scacq_system_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scacq_system_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scacq_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scacq_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_scacq_system_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_screl_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_screl_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_screl_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_screl_agent_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_screl_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_screl_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_screl_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_screl_agent_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_screl_system_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_screl_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_screl_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_screl_system_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_screl_system_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_screl_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_screl_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_screl_system_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scar_agent_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scar_agent_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scar_agent_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_scar_agent_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scar_agent_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scar_agent_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scar_agent_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_scar_agent_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scar_system_b32_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scar_system_b32_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scar_system_b32_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u32 $s0, [$d1]; + atomic_cas_global_scar_system_b32 $s1, [$d0], $s0, $s0; + // %return + ret; +}; + +/* + * Kernel name: __memory_atomic_cas_global_scar_system_b64_kernel( + */ + +prog kernel &__memory_atomic_cas_global_scar_system_b64_kernel( + kernarg_u64 %data, + kernarg_u64 %value) { + +@__memory_atomic_cas_global_scar_system_b64_kernel_entry: + // BB#0: + ld_kernarg_align(8)_width(all)_u64 $d0, [%data]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%value]; + ld_global_u64 $d2, [$d1]; + atomic_cas_global_scar_system_b64 $d1, [$d0], $d2, $d2; + // %return + ret; +}; + diff --git a/src/kernels/mixed_scope.brig b/src/kernels/mixed_scope.brig new file mode 100644 index 0000000..c3adddf Binary files /dev/null and b/src/kernels/mixed_scope.brig differ diff --git a/src/kernels/mixed_scope.hsail b/src/kernels/mixed_scope.hsail new file mode 100644 index 0000000..2f09cb4 --- /dev/null +++ b/src/kernels/mixed_scope.hsail @@ -0,0 +1,84 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &mixed_scope:1:0:$full:$large:$default; + +decl prog function &abort()(); + +kernel &__vector_copy_kernel1( + kernarg_u64 %a, + kernarg_u64 %b) +{ +@__vector_copy_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s0, 0; + cvt_s64_s32 $d0, $s0; + shl_u64 $d0, $d0, 2; + ld_kernarg_align(8)_width(all)_u64 $d1, [%b]; + add_u64 $d1, $d1, $d0; + ld_kernarg_align(8)_width(all)_u64 $d2, [%a]; + add_u64 $d0, $d2, $d0; + ld_global_u32 $s0, [$d0]; + st_global_u32 $s0, [$d1]; + ret; +}; + +prog kernel &__vector_copy_kernel2( + kernarg_u64 %a, + kernarg_u64 %b) +{ +@__vector_copy_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s0, 0; + cvt_s64_s32 $d0, $s0; + shl_u64 $d0, $d0, 2; + ld_kernarg_align(8)_width(all)_u64 $d1, [%b]; + add_u64 $d1, $d1, $d0; + ld_kernarg_align(8)_width(all)_u64 $d2, [%a]; + add_u64 $d0, $d2, $d0; + ld_global_u32 $s0, [$d0]; + st_global_u32 $s0, [$d1]; + ret; +}; diff --git a/src/kernels/module_scope.brig b/src/kernels/module_scope.brig new file mode 100644 index 0000000..e110830 Binary files /dev/null and b/src/kernels/module_scope.brig differ diff --git a/src/kernels/module_scope.hsail b/src/kernels/module_scope.hsail new file mode 100644 index 0000000..bf1f466 --- /dev/null +++ b/src/kernels/module_scope.hsail @@ -0,0 +1,66 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &module_scope:1:0:$full:$large:$default; + +decl prog function &abort()(); + +kernel &__vector_copy_kernel( + kernarg_u64 %a, + kernarg_u64 %b) +{ +@__vector_copy_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s0, 0; + cvt_s64_s32 $d0, $s0; + shl_u64 $d0, $d0, 2; + ld_kernarg_align(8)_width(all)_u64 $d1, [%b]; + add_u64 $d1, $d1, $d0; + ld_kernarg_align(8)_width(all)_u64 $d2, [%a]; + add_u64 $d0, $d2, $d0; + ld_global_u32 $s0, [$d0]; + st_global_u32 $s0, [$d1]; + ret; +}; diff --git a/src/kernels/no_op.brig b/src/kernels/no_op.brig new file mode 100644 index 0000000..9164007 Binary files /dev/null and b/src/kernels/no_op.brig differ diff --git a/src/kernels/no_op.hsail b/src/kernels/no_op.hsail new file mode 100644 index 0000000..74c7b58 --- /dev/null +++ b/src/kernels/no_op.hsail @@ -0,0 +1,67 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &no_op:1:0:$full:$large:$default; + +/* + * Kernels: no_op + * + * Description: The no_op kernel performs no work. + * + * Psuedo code: + * + * __kernel void no_op() { + * return; + * } + * + */ + +decl prog function &abort()(); + +prog kernel &__no_op_kernel() { +@__no_op_kernel_entry: + // BB#0: // %entry + ret; +}; diff --git a/src/kernels/no_op2.brig b/src/kernels/no_op2.brig new file mode 100644 index 0000000..fec6833 Binary files /dev/null and b/src/kernels/no_op2.brig differ diff --git a/src/kernels/no_op2.hsail b/src/kernels/no_op2.hsail new file mode 100644 index 0000000..e4846e7 --- /dev/null +++ b/src/kernels/no_op2.hsail @@ -0,0 +1,67 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &no_op2:1:0:$full:$large:$default; + +/* + * Kernels: no_op2 + * + * Description: The no_op2 kernel performs no work. + * + * Psuedo code: + * + * __kernel void no_op2() { + * return; + * } + * + */ + +decl prog function &abort()(); + +prog kernel &__no_op2_kernel() { +@__no_op_kernel_entry: + // BB#0: // %entry + ret; +}; diff --git a/src/kernels/no_op_base_large.brig b/src/kernels/no_op_base_large.brig new file mode 100755 index 0000000..16e698a Binary files /dev/null and b/src/kernels/no_op_base_large.brig differ diff --git a/src/kernels/no_op_small.brig b/src/kernels/no_op_small.brig new file mode 100644 index 0000000..b75f8d5 Binary files /dev/null and b/src/kernels/no_op_small.brig differ diff --git a/src/kernels/no_op_small.hsail b/src/kernels/no_op_small.hsail new file mode 100644 index 0000000..160ad8a --- /dev/null +++ b/src/kernels/no_op_small.hsail @@ -0,0 +1,67 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &no_op_small:1:0:$full:$small:$default; + +/* + * Kernels: no_op + * + * Description: The no_op kernel performs no work. + * + * Psuedo code: + * + * __kernel void no_op() { + * return; + * } + * + */ + +decl prog function &abort()(); + +prog kernel &__no_op_small_kernel() { +@__no_op_kernel_entry: + // BB#0: // %entry + ret; +}; diff --git a/src/kernels/private_memory.brig b/src/kernels/private_memory.brig new file mode 100644 index 0000000..380a8a8 Binary files /dev/null and b/src/kernels/private_memory.brig differ diff --git a/src/kernels/private_memory.hsail b/src/kernels/private_memory.hsail new file mode 100644 index 0000000..7d8fae6 --- /dev/null +++ b/src/kernels/private_memory.hsail @@ -0,0 +1,108 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &private_memory:1:0:$full:$large:$default; + +/* + * Kernels: private_memory_kernel + * + * Description: The purpose of the private memory kernel + * is to generate a BRIG file that requires private memory. + * + * Psuedo code: + * + * __kernel void private_memory(__global uint *in, __global uint *out, __private uint count) { + * __private uint gid; + * __private uint lid; + * __private uint pvt[256]; + * + * gid = get_global_id(0); + * + * lid = get_local_id(0); + * + * if(lid > count) { + * return; + * } + * + * barrier(CLK_LOCAL_MEM_FENCE); + * + * out[gid] = in[gid]; + * + * return; + * } + * + * + */ + +decl prog function &abort()(); + +prog kernel &__private_memory_kernel( + kernarg_u64 %in, + kernarg_u64 %out, + kernarg_u32 %count) { + private_u32 %pvt[128]; + +@__private_memory_kernel_entry: + // BB#0: // %entry + ld_kernarg_align(4)_width(all)_u32 $s0, [%count]; + workitemid_u32 $s1, 0; + cmp_gt_b1_u32 $c0, $s1, $s0; + cbr_b1 $c0, @BB0_2; + // BB#1: // %if.end + ld_kernarg_align(8)_width(all)_u64 $d1, [%out]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%in]; + workitemabsid_u32 $s0, 0; + cvt_u64_u32 $d2, $s0; + shl_u64 $d2, $d2, 2; + add_u64 $d1, $d1, $d2; + add_u64 $d0, $d0, $d2; + barrier; + ld_global_u32 $s0, [$d0]; + st_global_u32 $s0, [$d1]; + +@BB0_2: + // %return + ret; +}; diff --git a/src/kernels/program_scope.brig b/src/kernels/program_scope.brig new file mode 100644 index 0000000..a29cedc Binary files /dev/null and b/src/kernels/program_scope.brig differ diff --git a/src/kernels/program_scope.hsail b/src/kernels/program_scope.hsail new file mode 100644 index 0000000..fedaa12 --- /dev/null +++ b/src/kernels/program_scope.hsail @@ -0,0 +1,66 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &program_scope:1:0:$full:$large:$default; + +decl prog function &abort()(); + +prog kernel &__vector_copy_kernel( + kernarg_u64 %a, + kernarg_u64 %b) +{ +@__vector_copy_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s0, 0; + cvt_s64_s32 $d0, $s0; + shl_u64 $d0, $d0, 2; + ld_kernarg_align(8)_width(all)_u64 $d1, [%b]; + add_u64 $d1, $d1, $d0; + ld_kernarg_align(8)_width(all)_u64 $d2, [%a]; + add_u64 $d0, $d2, $d0; + ld_global_u32 $s0, [$d0]; + st_global_u32 $s0, [$d1]; + ret; +}; diff --git a/src/kernels/readonly_vector_copy.brig b/src/kernels/readonly_vector_copy.brig new file mode 100644 index 0000000..1b396a8 Binary files /dev/null and b/src/kernels/readonly_vector_copy.brig differ diff --git a/src/kernels/readonly_vector_copy.hsail b/src/kernels/readonly_vector_copy.hsail new file mode 100755 index 0000000..e76d61b --- /dev/null +++ b/src/kernels/readonly_vector_copy.hsail @@ -0,0 +1,67 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &readonly_vector_copy:1:0:$full:$large:$default; + +decl prog function &abort()(); + +decl prog readonly_u32 &a[]; + +prog kernel &__readonly_vector_copy_kernel( + kernarg_u64 %b) { +@__readonly_vector_copy_kernel_entry: + // BB#0: // %entry + // BB#0: // %entry + workitemabsid_u32 $s1, 0; + cvt_s64_s32 $d0, $s1; + shl_u64 $d1, $d0, 2; + ld_readonly_u32 $s0, [&a][$d1]; + + ld_kernarg_align(8)_width(all)_u64 $d2, [%b]; + add_u64 $d1, $d2, $d1; + //ld_readonly_u32 $s0, [&a][$d0]; + st_global_u32 $s0, [$d1]; + ret; +}; diff --git a/src/kernels/recursive_func.brig b/src/kernels/recursive_func.brig new file mode 100644 index 0000000..a22821a Binary files /dev/null and b/src/kernels/recursive_func.brig differ diff --git a/src/kernels/recursive_func.hsail b/src/kernels/recursive_func.hsail new file mode 100644 index 0000000..6a5a6a2 --- /dev/null +++ b/src/kernels/recursive_func.hsail @@ -0,0 +1,104 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &recursive_func:1:0:$full:$large:$default; + +decl prog function &abort()(); + +prog function &recur_add_fn(arg_u32 %out)(arg_u32 %in0, arg_u32 %in1) { + ld_arg_u32 $s0, [%in0]; //expect to be 0 at first call, in1 on last call + ld_arg_u32 $s1, [%in1]; //expect to be RECURSE_COUNT + // if the in1 <= in0, go to return + cmp_eq_b1_u32 $c1, $s1, $s0; + cbr_b1 $c1, @return; + + // if in0 <= in1, increase in0 by 1 + // call the function recursively with the updated in0 value + { + arg_u32 %outarg; + arg_u32 %arg0; + arg_u32 %arg1; + add_u32 $s0, $s0, 1; + //fill in the arguments + st_arg_u32 $s0, [%arg0]; + st_arg_u32 $s1, [%arg1]; + call &recur_add_fn (%outarg)(%arg0,%arg1); + } // store the value of s0 to out, then return + +@return: + st_arg_u32 $s0, [%out]; + ret; +}; + +prog kernel &__recursive_func_kernel( + kernarg_u64 %out, + kernarg_u32 %in0, + kernarg_u32 %in1 + ) { +@__recursive_func_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s0, 0; // obtain the work_item absolute ID within the entire grid + cvt_u64_u32 $d0, $s0; // convert 32 bits to 64 bits, stored in $d0 + shl_u64 $d1, $d0, 2; // shift $d0 left by 2 bit, store in $d1, for integers + ld_kernarg_align(8)_width(all)_u64 $d0, [%out]; // load the base address of out to $d0 + add_u64 $d0, $d0, $d1; // calculate the address of the output element + ld_kernarg_align(4)_width(all)_u32 $s1, [%in0]; // load the value of in0 + ld_kernarg_align(4)_width(all)_u32 $s2, [%in1]; // load the value of in1 + + // start argument scope + { + arg_u32 %outarg; + arg_u32 %arg0; + arg_u32 %arg1; + // fill in the arguments + st_arg_u32 $s1, [%arg0]; + st_arg_u32 $s2, [%arg1]; + call &recur_add_fn(%outarg)(%arg0,%arg1); + ld_arg_align(4)_u32 $s0, [%outarg]; + } // end of argument scope + + st_global_align(4)_s32 $s0, [$d0]; // store the result to the out vector + ret; +}; diff --git a/src/kernels/signal_operations.brig b/src/kernels/signal_operations.brig new file mode 100644 index 0000000..d21a069 Binary files /dev/null and b/src/kernels/signal_operations.brig differ diff --git a/src/kernels/signal_operations.hsail b/src/kernels/signal_operations.hsail new file mode 100644 index 0000000..4b6867e --- /dev/null +++ b/src/kernels/signal_operations.hsail @@ -0,0 +1,204 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &signal_operations:1:0:$full:$large:$default; + +decl prog function &abort()(); + +prog kernel &__signal_st_rlx_kernel( + kernarg_u32 %count, + kernarg_u64 %signal_handle, + kernarg_u64 %signal_value) { +@__signal_st_rlx_kernel_entry: + // BB#0: // %entry + // If the gid is greater than count, just return. + workitemabsid_u32 $s0, 0; + ld_kernarg_align(4)_width(all)_s32 $s1, [%count]; + cmp_gt_b1_s32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_2; + + // BB#1: // %if.end + // Use the gid to select the appropriate signal handle and value. + ld_kernarg_align(8)_width(all)_u64 $d0, [%signal_value]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%signal_handle]; + cvt_s64_s32 $d2, $s0; + shl_u64 $d2, $d2, 3; + + // The $d1 register holds the address of the signal handle. + add_u64 $d1, $d1, $d2; + + // The $d0 register holds the address of the signal value. + add_u64 $d0, $d0, $d2; + + // The $d2 register is the signal handle. + ld_global_sig64 $d2, [$d1]; + + // The $s0 register is the desired signal value. + ld_global_s64 $d3, [$d0]; + + // Set the signal value. + signalnoret_st_rlx_b64_sig64 $d2, $d3; + +@BB0_2: + // %return + ret; +}; + +prog kernel &__signal_st_screl_kernel( + kernarg_u32 %count, + kernarg_u64 %signal_handle, + kernarg_u64 %signal_value) { +@__signal_st_screl_kernel_entry: + // BB#0: // %entry + // If the gid is greater than count, just return. + workitemabsid_u32 $s0, 0; + ld_kernarg_align(4)_width(all)_s32 $s1, [%count]; + cmp_gt_b1_s32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_2; + + // BB#1: // %if.end + // Use the gid to select the appropriate signal handle and value. + ld_kernarg_align(8)_width(all)_u64 $d0, [%signal_value]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%signal_handle]; + cvt_s64_s32 $d2, $s0; + shl_u64 $d2, $d2, 3; + + // The $d1 register holds the address of the signal handle. + add_u64 $d1, $d1, $d2; + + // The $d0 register holds the address of the signal value + add_u64 $d0, $d0, $d2; + + // The $d2 register is the signal handle. + ld_global_sig64 $d2, [$d1]; + + // The $s0 register is the desired signal value. + ld_global_s64 $d3, [$d0]; + + // Set the signal value. + signalnoret_st_screl_b64_sig64 $d2, $d3; + +@BB0_2: + // %return + ret; +}; + +prog kernel &__signal_wait_eq_rlx_kernel( + kernarg_u32 %count, + kernarg_u64 %signal_handle, + kernarg_u64 %compare_value) { +@__signal_wait_eq_rlx_kernel_entry: + // BB#0: // %entry + // If the gid is greater than count, just return. + workitemabsid_u32 $s0, 0; + ld_kernarg_align(4)_width(all)_s32 $s1, [%count]; + cmp_gt_b1_s32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_2; + + // BB#1: // %if.end + // Use the gid to select the appropriate signal handle and value. + ld_kernarg_align(8)_width(all)_u64 $d0, [%compare_value]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%signal_handle]; + cvt_s64_s32 $d2, $s0; + shl_u64 $d2, $d2, 3; + + // The $d1 register holds the address of the signal handle. + add_u64 $d1, $d1, $d2; + + // The $d0 register holds the address of the compare value. + add_u64 $d0, $d0, $d2; + + // The $d2 register is the signal handle. + ld_global_sig64 $d2, [$d1]; + + // The $d3 register contains the compare value. + ld_global_s64 $d3, [$d0]; + + // Wait on the signal value. + signal_wait_eq_rlx_s64_sig64 $d3, $d2, $d3; + +@BB0_2: + // %return + ret; +}; + +prog kernel &__signal_wait_eq_scacq_kernel( + kernarg_u32 %count, + kernarg_u64 %signal_handle, + kernarg_u64 %compare_value) { +@__signal_wait_eq_scacq_kernel_entry: + // BB#0: // %entry + // If the gid is greater than count, just return. + workitemabsid_u32 $s0, 0; + ld_kernarg_align(4)_width(all)_s32 $s1, [%count]; + cmp_gt_b1_s32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_2; + + // BB#1: // %if.end + // Use the gid to select the appropriate signal handle and value. + ld_kernarg_align(8)_width(all)_u64 $d0, [%compare_value]; + ld_kernarg_align(8)_width(all)_u64 $d1, [%signal_handle]; + cvt_s64_s32 $d2, $s0; + shl_u64 $d2, $d2, 3; + + // The $d1 register holds the address of the signal handle. + add_u64 $d1, $d1, $d2; + + // The $d0 register holds the address of the compare value. + add_u64 $d0, $d0, $d2; + + // The $d2 register is the signal handle. + ld_global_sig64 $d2, [$d1]; + + // The $d3 register contains the compare value. + ld_global_s64 $d3, [$d0]; + + // Wait on the signal value. + signal_wait_eq_scacq_s64_sig64 $d3, $d2, $d3; + +@BB0_2: + // %return + ret; +}; diff --git a/src/kernels/vector_copy.brig b/src/kernels/vector_copy.brig new file mode 100644 index 0000000..5ac85e9 Binary files /dev/null and b/src/kernels/vector_copy.brig differ diff --git a/src/kernels/vector_copy.hsail b/src/kernels/vector_copy.hsail new file mode 100644 index 0000000..25374e3 --- /dev/null +++ b/src/kernels/vector_copy.hsail @@ -0,0 +1,65 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &vector_copy:1:0:$full:$large:$default; + +decl prog function &abort()(); + +prog kernel &__vector_copy_kernel( + kernarg_u64 %a, + kernarg_u64 %b) { +@__vector_copy_kernel_entry: + // BB#0: // %entry + workitemabsid_u32 $s0, 0; + cvt_s64_s32 $d0, $s0; + shl_u64 $d0, $d0, 2; + ld_kernarg_align(8)_width(all)_u64 $d1, [%b]; + add_u64 $d1, $d1, $d0; + ld_kernarg_align(8)_width(all)_u64 $d2, [%a]; + add_u64 $d0, $d2, $d0; + ld_global_u32 $s0, [$d0]; + st_global_u32 $s0, [$d1]; + ret; +}; diff --git a/src/kernels/verify_image_region.brig b/src/kernels/verify_image_region.brig new file mode 100644 index 0000000..10dd881 Binary files /dev/null and b/src/kernels/verify_image_region.brig differ diff --git a/src/kernels/verify_image_region.hsail b/src/kernels/verify_image_region.hsail new file mode 100644 index 0000000..76e5239 --- /dev/null +++ b/src/kernels/verify_image_region.hsail @@ -0,0 +1,2705 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +module &verify_image_region:1:0:$full:$large:$default; + +/* + * verify_image_region + */ + +extension "IMAGE"; + +decl prog function &abort()(); + +prog kernel &__verify_image_region_kernel_f32_1d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_1d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask, start_region, end_region and bits values + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_global_u32 $s1, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_global_u32 $s2, [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s2; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1d_f32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, $s0; + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + sub_near_f32 $s1, $s2, $s6; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + sub_near_f32 $s1, $s3, $s7; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + sub_near_f32 $s1, $s4, $s8; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha output error field. + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_f32_1db( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_1db_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_global_u32 $s1, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_global_u32 $s2, [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s2; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1db_f32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, $s0; + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + sub_near_f32 $s1, $s2, $s6; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + sub_near_f32 $s1, $s3, $s7; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + sub_near_f32 $s1, $s4, $s8; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha output error field. + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_f32_1da( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_1da_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + //This check the array index value + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1da_f32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + sub_near_f32 $s1, $s2, $s6; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + sub_near_f32 $s1, $s3, $s7; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + sub_near_f32 $s1, $s4, $s8; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_f32_2d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_2d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_2d_f32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + sub_near_f32 $s1, $s2, $s6; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + sub_near_f32 $s1, $s3, $s7; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + sub_near_f32 $s1, $s4, $s8; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_f32_2ddepth( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_2ddepth_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component value from the selected location + ld_global_f32 $s5, [$d1]; + + //Load the pixel component values + ldimage_2ddepth_f32_rwimg_u32 $s1, $d2, ($s0, $s9); + + //Check the depth channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the depth image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the depth component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_f32_2da( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_2da_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_2da_f32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9, $s10); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + sub_near_f32 $s1, $s2, $s6; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + sub_near_f32 $s1, $s3, $s7; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + sub_near_f32 $s1, $s4, $s8; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_f32_2dadepth( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_2dadepth_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_global_f32 $s5, [$d1]; + + //Load the pixel component values + ldimage_2dadepth_f32_rwimg_u32 $s1, $d2, ($s0, $s9, $s10); + + //Check the depth channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the depth image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the depth component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_f32_3d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_f32_3d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Calculate the floating point error tolerance + //based on the data type bits value + ld_kernarg_align(8)_width(all)_u64 $d1, [%bits]; + ld_global_u32 $s1, [$d1]; + //The scale is 2 to the power of bits (shift 1 to the left + //bit times) + shl_u32 $s2, 0x1, $s1; + cvt_f32_u32 $s1, $s2; + //The floating point tolerance is abs(0.6/float(scale)) + div_f32 $s15, 0.6f, $s1; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_3d_f32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9, $s10); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + sub_near_f32 $s1, $s1, $s5; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + sub_near_f32 $s1, $s2, $s6; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + sub_near_f32 $s1, $s3, $s7; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + sub_near_f32 $s1, $s4, $s8; + abs_f32 $s1, $s1; + cmp_lt_b1_f32 $c0, $s1, $s15; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_1d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_1d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_global_u32 $s1, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_global_u32 $s2, [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s2; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_u32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1d_u32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, $s0; + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_u32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_u32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_u32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha output error field. + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_1db( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_1db_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_global_u32 $s1, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_global_u32 $s2, [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s2; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_u32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1db_u32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, $s0; + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_u32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_u32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_u32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha output error field. + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_1da( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_1da_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + //This check the array index value + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_u32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1da_u32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_u32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_u32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_u32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_2d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_2d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_u32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_2d_u32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_u32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_u32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_u32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_2ddepth( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_2ddepth_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component value from the selected location + ld_global_u32 $s5, [$d1]; + + //Load the pixel component values + ldimage_2ddepth_u32_rwimg_u32 $s1, $d2, ($s0, $s9); + + //Check the depth channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the depth image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the depth component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_2da( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_2da_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_u32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_2da_u32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9, $s10); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_u32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_u32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_u32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_2dadepth( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_2dadepth_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_global_u32 $s5, [$d1]; + + //Load the pixel component values + ldimage_2dadepth_u32_rwimg_u32 $s1, $d2, ($s0, $s9, $s10); + + //Check the depth channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the depth image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the depth component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_u32_3d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_u32_3d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_u32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_3d_u32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9, $s10); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_u32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_u32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_u32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_u32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_1d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_1d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_global_u32 $s1, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_global_u32 $s2, [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s2; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_s32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1d_s32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, $s0; + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_s32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_s32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_s32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha output error field. + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_1db( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_1db_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_global_u32 $s1, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_global_u32 $s2, [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s2; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_s32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1db_s32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, $s0; + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_s32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_s32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_s32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha output error field. + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_1da( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_1da_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + //This check the array index value + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_s32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_1da_s32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_s32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_s32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_s32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_2d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_2d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_s32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_2d_s32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_s32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_s32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_s32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_2ddepth( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_2ddepth_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v2_global_u32 ($s1, $s2), [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v2_global_u32 ($s3, $s4), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s3; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s4; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component value from the selected location + ld_global_s32 $s5, [$d1]; + + //Load the pixel component values + ldimage_2ddepth_s32_rwimg_u32 $s1, $d2, ($s0, $s9); + + //Check the depth channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the depth image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the depth component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_2da( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_2da_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_s32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_2da_s32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9, $s10); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_s32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_s32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_s32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_2dadepth( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_2dadepth_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_global_s32 $s5, [$d1]; + + //Load the pixel component values + ldimage_2dadepth_s32_rwimg_u32 $s1, $d2, ($s0, $s9, $s10); + + //Check the depth channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the depth image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the depth component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + st_global_u32 $s14, [$d0]; + + ret; +}; + +prog kernel &__verify_image_region_kernel_s32_3d( + kernarg_rwimg %image, + kernarg_u64 %rgn_values, + kernarg_u64 %bkg_values, + kernarg_u64 %start_region, + kernarg_u64 %end_region, + kernarg_u64 %bits, + kernarg_u64 %cmp_mask, + kernarg_u64 %error) +{ +@__verify_image_region_kernel_s32_3d_entry: + // BB#0: // %entry + mov_b64 $d2, 0; + dim_u32 $s1; + + ret; + + ld_kernarg_rwimg $d2, [%image]; + ld_kernarg_align(8)_width(all)_u64 $d0, [%error]; + and_b32 $s14, $s14, 0x00000000; + + //Load the cmp_mask value + ld_kernarg_align(8)_width(all)_u64 $d1, [%cmp_mask]; + ld_global_u32 $s12, [$d1]; + ld_kernarg_align(4)_width(all)_u64 $d1, [%start_region]; + ld_v3_global_u32 ($s1,$s2,$s3), [$d1]; + + ld_kernarg_align(4)_width(all)_u64 $d1, [%end_region]; + ld_v3_global_u32 ($s4,$s5,$s6), [$d1]; + + workitemabsid_u32 $s0, 0; + //If coord < start_region.x the coord is out of the region + cmp_lt_b1_u32 $c0, $s0, $s1; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.x the coord is out of the region + cmp_gt_b1_u32 $c0, $s0, $s4; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s9, 1; + //If coord < start_region.y the coord is out of the region + cmp_lt_b1_u32 $c0, $s9, $s2; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.y the coord is out of the region + cmp_gt_b1_u32 $c0, $s9, $s5; + cbr_b1 $c0, @BB0_1; + + workitemabsid_u32 $s10, 2; + //If coord < start_region.z the coord is out of the region + cmp_lt_b1_u32 $c0, $s10, $s3; + cbr_b1 $c0, @BB0_1; + //If coord > end_region.z the coord is out of the region + cmp_gt_b1_u32 $c0, $s10, $s6; + cbr_b1 $c0, @BB0_1; + + //Specify the comparison component values from rgn_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%rgn_values]; + br @BB0_2; + +@BB0_1: + //Specify the comparison component values from bkg_values + ld_kernarg_align(8)_width(all)_u64 $d1, [%bkg_values]; + +@BB0_2: + + //Load the comparison component values from the selected location + ld_v4_global_s32 ($s5, $s6, $s7, $s8), [$d1]; + + //Load the pixel component values + ldimage_v4_3d_s32_rwimg_u32 ($s1, $s2, $s3, $s4), $d2, ($s0, $s9, $s10); + + //Check the red channel cmp_mask + and_b32 $s13, $s12, 0x1000; + cmp_ne_b1_b32 $c0, $s13, 0x1000; + cbr_b1 $c0, @BB0_3; + //Compare the red image component with the loaded value + cmp_eq_b1_s32 $c0, $s1, $s5; + cbr_b1 $c0, @BB0_3; + //Set the bit in the red component error field + or_b32 $s14, $s14, 0x1000; + +@BB0_3: + + //Check the green channel cmp_mask + and_b32 $s13, $s12, 0x0100; + cmp_ne_b1_b32 $c0, $s13, 0x0100; + cbr_b1 $c0, @BB0_4; + //Compare the green image component with the loaded value + cmp_eq_b1_s32 $c0, $s2, $s6; + cbr_b1 $c0, @BB0_4; + //Set the bit in the green component error field + or_b32 $s14, $s14, 0x0100; + +@BB0_4: + + //Check the blue channel cmp_mask + and_b32 $s13, $s12, 0x0010; + cmp_ne_b1_b32 $c0, $s13, 0x0010; + cbr_b1 $c0, @BB0_5; + //Compare the blue image component with the loaded value + cmp_eq_b1_s32 $c0, $s3, $s7; + cbr_b1 $c0, @BB0_5; + //Set the bit in the blue component error field + or_b32 $s14, $s14, 0x0010; + +@BB0_5: + + //Check the alpha channel cmp_mask + and_b32 $s13, $s12, 0x0001; + cmp_ne_b1_b32 $c0, $s13, 0x0001; + cbr_b1 $c0, @BB0_6; + //Compare the alpha image component with the loaded value + cmp_eq_b1_s32 $c0, $s4, $s8; + cbr_b1 $c0, @BB0_6; + //Set the bit in the alpha component error field + or_b32 $s14, $s14, 0x0001; + +@BB0_6: + st_global_u32 $s14, [$d0]; + + ret; +}; diff --git a/src/utils/agent_utils.c b/src/utils/agent_utils.c new file mode 100644 index 0000000..1b165cd --- /dev/null +++ b/src/utils/agent_utils.c @@ -0,0 +1,450 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include "agent_utils.h" +#include "framework.h" + +// Helper functions +char isPowerOfTwo(uint32_t x) { + return (x && ((x & (x-1)) == 0)); +} + +// Functions that check that reported attributes are expected +void check_system_info() { + hsa_status_t status; + uint16_t version_major; + status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &version_major); + ASSERT(status == HSA_STATUS_SUCCESS); + + uint16_t version_minor; + status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &version_minor); + ASSERT(status == HSA_STATUS_SUCCESS); + + uint64_t timestamp1, timestamp2; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, ×tamp1); + ASSERT(status == HSA_STATUS_SUCCESS); + sleep(1); + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, ×tamp2); + ASSERT(status == HSA_STATUS_SUCCESS); + ASSERT_MSG(timestamp2 > timestamp1, "Error: timestamp1: %u is not less than timestamp2: %u\n", timestamp1, timestamp2); + + uint16_t timestamp_freq; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, ×tamp_freq); + ASSERT(status == HSA_STATUS_SUCCESS); + // Disable value validation for HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY + // ASSERT_MSG(timestamp_freq >= 1 && timestamp_freq <= 400, "Invalid timestamp frequency: %u\n", timestamp_freq); + + uint64_t signal_max_wait; + status = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &signal_max_wait); + ASSERT(status == HSA_STATUS_SUCCESS); + + return; +} + +void check_agent_info(hsa_agent_t agent) { + hsa_status_t status; + char name[64]; + int ii; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name); + ASSERT(HSA_STATUS_SUCCESS == status); + + char vendor_name[64]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, vendor_name); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t feature = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &feature); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (HSA_AGENT_FEATURE_KERNEL_DISPATCH == feature) { + uint32_t wavefront_size = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT_MSG((wavefront_size >= 1) && (wavefront_size <= 256) && isPowerOfTwo(wavefront_size), "Error: wavefront_size = %u", wavefront_size); + + uint32_t workgroup_max_size; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &workgroup_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT_MSG(workgroup_max_size > 0, "Error: workgroup_max_size = %u", workgroup_max_size); + + uint16_t workgroup_max_dim[3]; + workgroup_max_dim[0] = 0; + workgroup_max_dim[1] = 0; + workgroup_max_dim[2] = 0; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, workgroup_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + for (ii = 0; ii < 3; ii++) { + ASSERT_MSG((workgroup_max_dim[ii] > 0 && workgroup_max_dim[ii] <= workgroup_max_size), "Error: workgroup_max_dim[%d] = %u", ii, workgroup_max_dim[ii]); + } + + uint32_t grid_max_size; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &grid_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(grid_max_size > 0); + + hsa_dim3_t grid_max_dim; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(grid_max_dim.x > 0 && grid_max_dim.x >= workgroup_max_dim[0] && grid_max_dim.x <= grid_max_size); + ASSERT(grid_max_dim.y > 0 && grid_max_dim.y >= workgroup_max_dim[1] && grid_max_dim.y <= grid_max_size); + ASSERT(grid_max_dim.z > 0 && grid_max_dim.z >= workgroup_max_dim[2] && grid_max_dim.z <= grid_max_size); + + uint32_t fbarriers_max_size; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &fbarriers_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT_MSG(fbarriers_max_size >= 32, "Error: fbarriers_max_size = %u", fbarriers_max_size); + + uint32_t queues_max; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &queues_max); + ASSERT(HSA_STATUS_SUCCESS == status); + + uint32_t queue_max_size; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max_size); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(queue_max_size > 0 && isPowerOfTwo(queue_max_size)); + } + + uint32_t node; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &node); + ASSERT(HSA_STATUS_SUCCESS == status); + + hsa_device_type_t device; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(device == HSA_DEVICE_TYPE_GPU || device == HSA_DEVICE_TYPE_CPU || HSA_DEVICE_TYPE_DSP); + + uint32_t cache_size[4]; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, cache_size); + ASSERT(HSA_STATUS_SUCCESS == status); + + if (device == HSA_DEVICE_TYPE_GPU) { + uint32_t image1d_max_elems; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS, &image1d_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image1d_max_elems <= 16384); + + uint32_t image1da_max_elems; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS, &image1da_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image1da_max_elems <= 16384); + + uint32_t image1db_max_elems; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS, &image1db_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image1db_max_elems <= 65536); + + uint32_t image2d_max_elems[2]; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS, image2d_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2d_max_elems[0] <= 16384 && image2d_max_elems[1] <= 16384); + + uint32_t image2da_max_elems[2]; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS, image2da_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2da_max_elems[0] <= 16384 && image2da_max_elems[1] <= 16384); + + uint32_t image2dd_max_elems[2]; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS, image2dd_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2dd_max_elems[0] <= 16384 && image2dd_max_elems[1] <= 16384); + + uint32_t image2dad_max_elems[2]; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS, image2dad_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image2dad_max_elems[0] <= 16384 && image2dad_max_elems[1] <= 16384); + + uint32_t image3d_max_elems[3]; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS, image3d_max_elems); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image3d_max_elems[0] <= 2048 && image3d_max_elems[1] <= 2048 && image3d_max_elems[2] <= 2048); + + uint32_t image_array_max_layers; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS, &image_array_max_layers); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image_array_max_layers <= 2048); + + uint32_t image_rd_max; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES, &image_rd_max); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image_rd_max >= 128); + + uint32_t image_rorw_max; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES, &image_rorw_max); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(image_rorw_max >= 64); + + uint32_t sampler_max; + status = hsa_agent_get_info(agent, HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS, &sampler_max); + ASSERT(HSA_STATUS_SUCCESS == status); + ASSERT(sampler_max >= 16); + } + + return; +} + +// Callback function to get the number of agents +hsa_status_t get_num_agents(hsa_agent_t agent, void* data) { + int *num_agents = (int *) data; + (*num_agents)++; + + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the list of agents +hsa_status_t get_agents(hsa_agent_t agent, void* data) { + hsa_agent_t **agent_list = (hsa_agent_t **) data; + **agent_list = agent; + (*agent_list)++; + + return HSA_STATUS_SUCCESS; +} + +void get_agent_list(struct agent_list_s *agent_list) { + size_t num_agents = 0; + int ii; + hsa_status_t status; + + // Get number of agents + status = hsa_iterate_agents(get_num_agents, &num_agents); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a array of size num_agents to store the agent list + agent_list->agents = (hsa_agent_t*) malloc(sizeof(hsa_agent_t) * num_agents); + + // Get the agent list + hsa_agent_t *agent_iter = agent_list->agents; + status = hsa_iterate_agents(get_agents, &agent_iter); + ASSERT(HSA_STATUS_SUCCESS == status); + + agent_list->num_agents = num_agents; + + return; +} + +void free_agent_list(struct agent_list_s *agent_list) { + agent_list->num_agents = 0; + free(agent_list->agents); + return; +} + +// Callbacks that get specific agent types +hsa_status_t get_cpu_agent(hsa_agent_t agent, void* data) { + hsa_status_t status; + hsa_device_type_t device_type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_CPU == device_type) { + hsa_agent_t* ret = (hsa_agent_t*)data; + *ret = agent; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t get_gpu_agent(hsa_agent_t agent, void* data) { + hsa_status_t status; + hsa_device_type_t device_type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_GPU == device_type) { + hsa_agent_t* ret = (hsa_agent_t*)data; + *ret = agent; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t get_kernel_dispatch_agent(hsa_agent_t agent, void* data) { + // callback function to get the first agent that supports kernel dispatch + hsa_status_t status; + hsa_agent_feature_t feature; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &feature); + if (HSA_STATUS_SUCCESS == status && + HSA_AGENT_FEATURE_KERNEL_DISPATCH == feature) { + hsa_agent_t* ret = (hsa_agent_t*)data; + *ret = agent; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +// Callback that checks an agent attributes are valid +hsa_status_t check_agent(hsa_agent_t agent, void* data) { + // Check the attributes of the agent + check_agent_info(agent); + + // Keep iterating + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the number of an agent's memory regions +hsa_status_t get_num_regions(hsa_region_t region, void* data) { + int *num_regions = (int *) data; + (*num_regions)++; + + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the list of an agent's memory regions +hsa_status_t get_regions(hsa_region_t region, void* data) { + hsa_region_t **region_list = (hsa_region_t **)data; + **region_list = region; + (*region_list)++; + + return HSA_STATUS_SUCCESS; +} + +void get_region_list(hsa_agent_t agent, struct region_list_s* region_list) { + size_t num_regions = 0; + int ii; + hsa_status_t status; + + // Get number of regions on the agent + status = hsa_agent_iterate_regions(agent, get_num_regions, &num_regions); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Create a array of size num_regions to store the regions list + region_list->regions = (hsa_region_t*) malloc(sizeof(hsa_region_t) * num_regions); + + // Get region list + hsa_region_t *region_iter = region_list->regions; + status = hsa_agent_iterate_regions(agent, get_regions, ®ion_iter); + ASSERT(HSA_STATUS_SUCCESS == status); + + region_list->num_regions = num_regions; + + return; +} + +void free_region_list(struct region_list_s* region_list) { + region_list->num_regions = 0; + free(region_list->regions); + return; +} + +// Callbacks to get specific types of memory regions +hsa_status_t get_kernarg_memory_region(hsa_region_t region, void* data) { + hsa_region_segment_t segment; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); + if (HSA_REGION_SEGMENT_GLOBAL != segment) { + return HSA_STATUS_SUCCESS; + } + + hsa_region_global_flag_t flags; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) { + hsa_region_t* ret = (hsa_region_t*) data; + *ret = region; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t get_group_memory_region(hsa_region_t region, void* data) { + hsa_region_segment_t segment; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); + if (HSA_REGION_SEGMENT_GROUP == segment) { + hsa_region_t* ret = (hsa_region_t*) data; + *ret = region; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t get_global_memory_region(hsa_region_t region, void* data) { + hsa_region_segment_t segment; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); + if (HSA_REGION_SEGMENT_GLOBAL == segment) { + hsa_region_t* ret = (hsa_region_t*) data; + *ret = region; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t get_global_memory_region_fine_grained(hsa_region_t region, void* data) { + hsa_region_segment_t segment; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); + if (HSA_REGION_SEGMENT_GLOBAL != segment) { + return HSA_STATUS_SUCCESS; + } + + hsa_region_global_flag_t flags; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if (flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) { + hsa_region_t* ret = (hsa_region_t*) data; + *ret = region; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t get_global_memory_region_coarse_grained(hsa_region_t region, void* data) { + hsa_region_segment_t segment; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); + if (HSA_REGION_SEGMENT_GLOBAL != segment) { + return HSA_STATUS_SUCCESS; + } + + hsa_region_global_flag_t flags; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) { + hsa_region_t* ret = (hsa_region_t*) data; + *ret = region; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} diff --git a/src/utils/agent_utils.h b/src/utils/agent_utils.h new file mode 100644 index 0000000..b14242a --- /dev/null +++ b/src/utils/agent_utils.h @@ -0,0 +1,117 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _AGENT_UTILS_H_ +#define _AGENT_UTILS_H_ +#include + +// Struct to store agent list information +struct agent_list_s { + size_t num_agents; + hsa_agent_t *agents; +}; + +// Check that the system info reported is consistent with specification +void check_system_info(); + +// Check that the agent info reported is consistent with specification +void check_agent_info(hsa_agent_t agent); + +// Callback that counts the number of agents +hsa_status_t get_num_agents(hsa_agent_t agent, void* data); + +// Callback that initializes an agent list +hsa_status_t get_agents(hsa_agent_t agent, void* data); + +// Get the list of all agents on the platform +void get_agent_list(struct agent_list_s *agent_list); + +// Free an agent list +void free_agent_list(struct agent_list_s *agent_list); + +// Get the first gpu agent returned by topology +hsa_status_t get_gpu_agent(hsa_agent_t agent, void* data); + +// Get the first cpu agent returned by topology +hsa_status_t get_cpu_agent(hsa_agent_t agent, void* data); + +// Get the first agent that supports kernel dispatch +hsa_status_t get_kernel_dispatch_agent(hsa_agent_t agent, void* data); + +// Callback that checks an agent's information +hsa_status_t check_agent(hsa_agent_t agent, void* data); + +// Struct to store agent memory region list information +struct region_list_s { + // number of regions in the list + size_t num_regions; + // region pointers + hsa_region_t* regions; +}; + +// Get all of the memory regions associated with an agent +void get_region_list(hsa_agent_t agent, struct region_list_s* region_list); + +// Free a memory region list +void free_region_list(struct region_list_s* region_list); + +// Callback to acquire a kernarg memory region associated with the agent +hsa_status_t get_kernarg_memory_region(hsa_region_t region, void* data); + +// Callback to acquire a group memory region associated with the agent +hsa_status_t get_group_memory_region(hsa_region_t region, void* data); + +// Callback to acquire a global memory region associated with the agent +hsa_status_t get_global_memory_region(hsa_region_t region, void* data); + +// Callback to acquire a fine grained global memory region associated +// with the agent +hsa_status_t get_global_memory_region_fine_grained(hsa_region_t region, void* data); + +// Callback to acquire a course grained global memory region associated +// with the agent +hsa_status_t get_global_memory_region_coarse_grained(hsa_region_t region, void* data); + +#endif // _AGENT_UTILS_H_ diff --git a/src/utils/concurrent_utils.c b/src/utils/concurrent_utils.c new file mode 100755 index 0000000..a2c2e6f --- /dev/null +++ b/src/utils/concurrent_utils.c @@ -0,0 +1,311 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include "concurrent_utils.h" + +/** + * @brief worker function is invoked by each thread to execute tests + * Initially, all threads are blocked to wait run_flag. After run_flag being + * set up, the worker function begin to execute test function and change + * the status of tests to TEST_RUNNING. After test function finish, the status + * of tests will be changed to TEST_FINISHED, and worker function will be + * blocked until run_flag being set up again. + * @param input Pointer to thread_aux data structure, which contains test + * function pointer and corresponding args for the test function, and other + * auxiliary information, including status of test, number of running tests, + * run_flag, exit_flag, etc. + */ + +static void *worker(void *input) { + void (*fun_prt)(void *input); + struct thread_aux* thread = (struct thread_aux*)input; + fun_prt = thread->test->fun_prt; + int run_flag_l = 0; + // While loop to repeatedly execute test function + while (1) { + pthread_mutex_lock(thread->test_mutex); + // Blocked to wait run_flag or exit_flag being changed + while (*thread->run_flag == run_flag_l && *thread->exit_flag == 0) { + pthread_cond_wait(thread->test_cond, thread->test_mutex); + } + pthread_mutex_unlock(thread->test_mutex); + + // Reset run_flag + run_flag_l = run_flag_l ^ 1; + + // If exit_flag is 0, run test function and set status of the test to + // TEST_RUNNING + if (*thread->exit_flag == 0) { + thread->test->status = TEST_RUNNING; + fun_prt(thread->test->data); + + // After test function finish, subtract the number of running tests via atomic operations + // and check the number of running tests, if the number equal to 1, + // it means all tests are finished, broadcast a signal to the wakeup master + // thread. + pthread_mutex_lock(thread->test_mutex); + (*(thread->num_running_t))--; + if ((*thread->num_running_t) == 0) { + pthread_cond_broadcast(thread->test_cond); + } + pthread_mutex_unlock(thread->test_mutex); + + // Set status of the test to TEST_STOP + thread->test->status = TEST_STOP; + } else { + // If exit_flag is no-zero, set status of the test to TEST_FINISHED + thread->test->status = TEST_FINISHED; + pthread_exit(NULL); + } + } + return NULL; +} + +/** + * @brief create a test_group data structure, initialize variables in + * the test_group structure, allocate a test_list of group_size and + * return a pointer to the test_group. + * @param group_size The size of test group, i.e., the size of test lists + * @return Pointer to the new test_group + */ +struct test_group *test_group_create(size_t group_size) { + struct test_group *new_group = malloc(sizeof(struct test_group)); + // initialize variables in the data structure + new_group->group_size = group_size; + new_group->n_threads = 0; + new_group->num_test = 0; + new_group->run_flag = 0; + new_group->exit_flag = 0; + new_group->num_running_t = 0; + // malloc test_list array with group_size + new_group->test_list = (struct test_aux *)malloc(sizeof(struct test_aux) * group_size); + + return new_group; +} + +void test_group_wait(struct test_group *t_group) { + pthread_mutex_lock(&t_group->test_mutex); + while (t_group->num_running_t != 0) { + pthread_cond_wait(&t_group->test_cond, &t_group->test_mutex); + } + pthread_mutex_unlock(&t_group->test_mutex); + + return; +} + +void test_group_add(struct test_group *t_group, void *fun_prt, void *data, size_t num_copy) { + if (t_group->group_size < (num_copy + t_group->num_test)) { + fprintf(stderr, "Error beyound group size: %lu, please resize the test_group\n", t_group->group_size); + return; + } + + int num_test = t_group->num_test; + struct test_aux *test_list = t_group->test_list; + int ii; + for (ii = 0; ii < num_copy; ii++) { + test_list[num_test + ii].fun_prt = fun_prt; + test_list[num_test + ii].data = data; + test_list[num_test + ii].status = TEST_NOT_STARTED; + } + t_group->num_test = num_test + num_copy; + + return; +} + +void test_group_resize(struct test_group *t_group, size_t new_group_size) { + if (new_group_size < t_group->group_size) { + fprintf(stderr, "Error new group_size is smaller than current group_size\n"); + } + struct test_aux *new_test_list; + new_test_list = (struct test_aux *)realloc(t_group->test_list, new_group_size * sizeof(struct test_aux)); + t_group->group_size = new_group_size; + t_group->test_list = new_test_list; + + return; +} + +// Create threads for tests +void test_group_thread_create(struct test_group *t_group) { + pthread_mutex_init(&(t_group->test_mutex), NULL); + pthread_cond_init(&(t_group->test_cond), NULL); + pthread_attr_init(&(t_group->attr)); + pthread_attr_setdetachstate(&(t_group->attr), PTHREAD_CREATE_JOINABLE); + + int n_threads; + int ii = 0; + + n_threads = t_group->n_threads = t_group->num_test; + struct thread_aux *thread_list = t_group->thread_list = + (struct thread_aux *)malloc(sizeof(struct thread_aux) * n_threads); + t_group->tid = (pthread_t*)malloc(sizeof(pthread_t) * n_threads); + + for (ii = 0; ii < n_threads; ++ii) { + // CPU_ZERO(&thread_list[ii].cpuset); + thread_list[ii].tid = ii; + thread_list[ii].test = t_group->test_list + ii; + thread_list[ii].run_flag = &(t_group->run_flag); + thread_list[ii].exit_flag = &(t_group->exit_flag); + thread_list[ii].test_mutex = &(t_group->test_mutex); + thread_list[ii].test_cond = &(t_group->test_cond); + thread_list[ii].num_running_t = &(t_group->num_running_t); + int status = pthread_create(t_group->tid + ii, &(t_group->attr), worker, thread_list + ii); + if (status < 0) { + perror("pthread_create failed"); + } + } + + return; +} + +// Return number of test +int test_group_num_tests(struct test_group *t_group) { + return t_group->num_test; +} + +// Set affinity of the specific test +void test_group_thread_affinity(struct test_group *t_group, int test_id, int cpu_id) { +/* Setting CPU affinity isn't currently supported. + * CPU_SET(cpu_id, &t_group->thread_list[test_id].cpuset); + * int status; + * status = pthread_setaffinity_np(t_group->tid[test_id], + * sizeof(cpu_set_t), &t_group->thread_list[test_id].cpuset); + * if (status != 0) { + * perror("pthread_setaffinity_np error"); + * } + */ + return; +} + +// Set run_flag to 1 +void test_group_start(struct test_group *t_group) { + int ii; + if (t_group->num_running_t != 0) { + printf("Error: %d tests are not finished\n", t_group->num_running_t); + return; + } + + pthread_mutex_lock(&t_group->test_mutex); + t_group->run_flag = t_group->run_flag ^ 1; + t_group->num_running_t = t_group->num_test; + pthread_cond_broadcast(&t_group->test_cond); + pthread_mutex_unlock(&t_group->test_mutex); + + return; +} + +// Set exit_flag to 1, wait all threads finish and cleanup +void test_group_exit(struct test_group *t_group) { + int ii = 0; + int status; + + pthread_mutex_lock(&t_group->test_mutex); + t_group->exit_flag = 1; + pthread_cond_broadcast(&t_group->test_cond); + pthread_mutex_unlock(&t_group->test_mutex); + + for (ii = 0; ii < t_group->n_threads; ++ii) { + status = pthread_join(t_group->tid[ii], 0); + if (status < 0) { + perror("pthread_join failed"); + t_group->test_list[ii].status = TEST_ERROR; + } + } + + pthread_attr_destroy(&(t_group->attr)); + pthread_mutex_destroy(&(t_group->test_mutex)); + pthread_cond_destroy(&(t_group->test_cond)); + + free(t_group->tid); + free(t_group->thread_list); + + return; +} + +void test_group_kill(struct test_group *t_group) { + int ii = 0; + int status; + for (ii = 0; ii < t_group->n_threads; ++ii) { + status = pthread_cancel(t_group->tid[ii]); + if (status < 0) { + perror("pthread_cancel failed"); + t_group->test_list[ii].status = TEST_ERROR; + } + } + + pthread_attr_destroy(&(t_group->attr)); + pthread_mutex_destroy(&(t_group->test_mutex)); + pthread_cond_destroy(&(t_group->test_cond)); + + free(t_group->tid); + free(t_group->thread_list); + + return; +} + +void test_group_destroy(struct test_group *t_group) { + free(t_group->test_list); + free(t_group); + + return; +} + +int test_group_test_status(struct test_group *t_group, int test_id) { + if (test_id >= t_group->n_threads) { + fprintf(stderr, "test_id: %d is larger than the number of test: %d\n", test_id, t_group->num_test); + } + + if (t_group->test_list[test_id].status == TEST_RUNNING) { + if (pthread_kill(t_group->tid[test_id], 0) == ESRCH) { + t_group->test_list[test_id].status = TEST_ERROR; + } + } + + return t_group->test_list[test_id].status; +} diff --git a/src/utils/concurrent_utils.h b/src/utils/concurrent_utils.h new file mode 100755 index 0000000..3fa0f14 --- /dev/null +++ b/src/utils/concurrent_utils.h @@ -0,0 +1,205 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _CONCURRENT_UTILS_H_ +#define _CONCURRENT_UTILS_H_ + +#include +#include + +/** + * @enum TEST_STATUS + * @brief This enum lists status of test pthread + */ +enum TEST_STATUS {TEST_NOT_STARTED, TEST_RUNNING, TEST_STOP, TEST_FINISHED, TEST_ERROR}; + +/** + * @struct test_aux + * @brief This structure holds information for a test + */ +struct test_aux { + /* Pointer to the test function*/ + void *fun_prt; + /* Pointer to the data for the test function */ + void *data; + /* status of the test listed in enum TEST_STATUS */ + uint16_t status; +}; + +/** + * @struct thread_aux + * @brief This structure holds the data for a test thread. + */ +struct thread_aux { + /* Thread Id */ + int tid; + /* Pointer to a test item */ + struct test_aux *test; + /* Pointer to the run_flag shared in the test group */ + volatile int *run_flag; + /* Pointer to the exit_flag shared in the test group */ + volatile int *exit_flag; + /* Pointer to the pthread mutex shared in the test group */ + pthread_mutex_t *test_mutex; + /* Pointer to the pthread condition shared in the test group */ + pthread_cond_t *test_cond; + /* Pointer to the number of running tests */ + volatile unsigned int *num_running_t; +}; + +/** + * @struct test_group + * @brief This structure holds data for a test group + */ +struct test_group { + /* test group size, i.e., size of test_list array*/ + size_t group_size; + /* number of test */ + int num_test; + /* number of threads - since one test per thread, equal to num_test */ + int n_threads; + /* a flag for telling all threads to run - 0: stop, 1: run */ + volatile int run_flag; + /* a flag for telling all threads to finish - 1: exit */ + volatile int exit_flag; + /* pthread tid */ + pthread_t *tid; + /* pthread attr */ + pthread_attr_t attr; + /* pthread mutex shared in a group */ + pthread_mutex_t test_mutex; + /* pthread condition signal shared in a group */ + pthread_cond_t test_cond; + /* the list of test info */ + struct test_aux *test_list; + /* the list of thread info */ + struct thread_aux *thread_list; + /* number of running tests */ + volatile unsigned int num_running_t; +}; + +/** + * @brief create a test group, and preallocate + * test_list array with group_size + * @return initialized struct test_group + */ +struct test_group* test_group_create(size_t group_size); + +/** + * @brief resize the array of test_list + * @return + */ +void test_group_resize(struct test_group *t_group, size_t new_group_size); + +/** + * @brief add a new test into the specific test group + * @param t_group Pointer to a test group + * @param fun Pointer to the test function + * @param data Pointer to data for the test function + * @param num_copy Number of copies of the test + */ +void test_group_add(struct test_group *t_group, void *fun, void *data, size_t num_copy); + +/** + * @brief create threads for tests in a test group + * @param t_group Pointer to a test group + */ +void test_group_thread_create(struct test_group *t_group); + +/** + * @brief return the number of tests in a test group + * @param t_group Pointer to a test group + */ +int test_group_num_tests(struct test_group *t_group); + +/** + * @brief run all threads/tests in a test group + * @param t_group Pointer to a test group + */ +void test_group_start(struct test_group *t_group); + +/** + * @brief wait all threads/tests in a test group finish + * The function is blocked until all threads are finished + * @param t_group Pointer to a test group + */ +void test_group_wait(struct test_group *t_group); + +/** + * @brief terminate all threads/tests in a test group by sending a signal + * set exit_flag to 1, wait until all threads are finished + * @param t_group Pointer to a test group + */ +void test_group_exit(struct test_group *t_group); + +/** + * @brief destroy a test group, release all resources + * @param t_group Pointer to a test group + */ +void test_group_destroy(struct test_group *t_group); + +/** + * @brief check the status of specific test in a test group + * @param t_group Pointer to a test group + * @param test_id Test No. + * @return the status of the test listed in enum TEST_STATUS + */ +int test_group_test_status(struct test_group *t_group, int test_id); + +/** + * @brief set affinity of the specific test + * @param t_group Pointer to a test group + * @param test_id Test No. + * @param cpu_id CPU No. that the test is binded to + */ +void test_group_thread_affinity(struct test_group *t_group, int test_id, int cpu_id); + +/** + * @brief force kill a test group + * @param t_group Pointer to a test group + */ +void test_group_kill(struct test_group *t_group); + +#endif // _CONCURRENT_UTILS_ diff --git a/src/utils/dispatch_utils.c b/src/utils/dispatch_utils.c new file mode 100644 index 0000000..212bc9f --- /dev/null +++ b/src/utils/dispatch_utils.c @@ -0,0 +1,102 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include "dispatch_utils.h" +#include "framework.h" + +// Dispatch the kernel, and wait for the kernel to finish +void dispatch_kernel_1d_data( + hsa_queue_t* queue, + uint32_t data_size, + uint64_t kernel_object, + void* kernarg_address) { + hsa_status_t status; + + // Create a signal with initial value of 1 + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + ASSERT(HSA_STATUS_SUCCESS == status); + + // Request a new packet ID + uint64_t packet_id = hsa_queue_add_write_index_acquire(queue, 1); + + // If the queue is full, block. + while (packet_id - hsa_queue_load_read_index_relaxed(queue) >= queue->size) {} + + // Compute packet offset + hsa_kernel_dispatch_packet_t* dispatch_packet = (hsa_kernel_dispatch_packet_t*)queue->base_address + + packet_id % queue->size; + + // Initialize the packet + memset(dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); + dispatch_packet->completion_signal = signal; + dispatch_packet->setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = (uint16_t)256; + dispatch_packet->workgroup_size_y = (uint16_t)1; + dispatch_packet->workgroup_size_z = (uint16_t)1; + dispatch_packet->grid_size_x = (uint32_t)data_size; + dispatch_packet->grid_size_y = 1; + dispatch_packet->grid_size_z = 1; + dispatch_packet->kernel_object = kernel_object; + dispatch_packet->kernarg_address = (void*) kernarg_address; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + __atomic_store_n((uint16_t*)(&dispatch_packet->header), header, __ATOMIC_RELEASE); + + // Signal the door bell to launch the packet + hsa_signal_store_release(queue->doorbell_signal, packet_id); + + // Wait until the kernel complete + while (0 != hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED)) {} + + hsa_signal_destroy(signal); + + return; +} diff --git a/src/utils/dispatch_utils.h b/src/utils/dispatch_utils.h new file mode 100644 index 0000000..76a786c --- /dev/null +++ b/src/utils/dispatch_utils.h @@ -0,0 +1,56 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _DISPATCH_UTILS_H_ +#define _DISPATCH_UTILS_H_ +#include + +void dispatch_kernel_1d_data(hsa_queue_t* queue, + uint32_t data_size, + uint64_t kernel_object, + void* kernarg_address); + +#endif // _DISPATCH_UTILS_H_ + diff --git a/src/utils/finalize_utils.c b/src/utils/finalize_utils.c new file mode 100644 index 0000000..f3863f2 --- /dev/null +++ b/src/utils/finalize_utils.c @@ -0,0 +1,244 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include "finalize_utils.h" +#include +#include +#include + +#define EXIT_IF(_cond_) if (_cond_) { goto exit; } + +#define GET_SYMBOL_INFO(_symbol_record_, _symbol_attribute_, _data_field_) { \ + status = hsa_executable_symbol_get_info(_symbol_record_.symbol, _symbol_attribute_, &(_symbol_record_._data_field_)); \ + if (HSA_STATUS_SUCCESS != status) { goto exit; } \ +} + +hsa_status_t get_finalization_fnc_tbl(hsa_ext_finalizer_pfn_t *table) { + bool support; + hsa_status_t status; + + if (NULL == table) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + status = hsa_system_extension_supported(HSA_EXTENSION_FINALIZER, 1, 0, &support); + + if (HSA_STATUS_SUCCESS != status) { + goto exit; + } + + if (!support) { + status = HSA_STATUS_ERROR; + goto exit; + } + + hsa_ext_finalizer_1_00_pfn_t table_1_00; + + status = hsa_system_get_extension_table(HSA_EXTENSION_FINALIZER, 1, 0, &table_1_00); + + if (HSA_STATUS_SUCCESS != status) { + goto exit; + } + + // Fill in the table. + table->hsa_ext_program_create = table_1_00.hsa_ext_program_create; + table->hsa_ext_program_destroy = table_1_00.hsa_ext_program_destroy; + table->hsa_ext_program_add_module = table_1_00.hsa_ext_program_add_module; + table->hsa_ext_program_iterate_modules = table_1_00.hsa_ext_program_iterate_modules; + table->hsa_ext_program_get_info = table_1_00.hsa_ext_program_get_info; + table->hsa_ext_program_finalize = table_1_00.hsa_ext_program_finalize; + + exit: + + return status; +} + +int load_module_from_file(const char* file_name, hsa_ext_module_t* module) { + int rc = -1; + + FILE *fp = fopen(file_name, "rb"); + + EXIT_IF(fp == NULL); + + EXIT_IF((rc = fseek(fp, 0, SEEK_END)) == -1); + + size_t file_size = (size_t) (ftell(fp) * sizeof(char)); + + EXIT_IF((rc = fseek(fp, 0, SEEK_SET)) == -1); + + char* buf = (char*) malloc(file_size); + + EXIT_IF(buf == NULL); + + memset(buf, 0, file_size); + + size_t read_size = fread(buf, sizeof(char), file_size, fp); + + if (read_size != file_size) { + free(buf); + } else { + rc = 0; + *module = (void*) buf; + } + + exit: + + fclose(fp); + + return rc; +} + +void destroy_module(hsa_ext_module_t module) { + char* buf = (char*) module; + + if (buf) { + free(buf); + } + + return; +} + +hsa_status_t finalize_executable(hsa_agent_t agent, + uint32_t module_count, + hsa_ext_module_t *modules, + hsa_machine_model_t machine_model, + hsa_profile_t profile, + hsa_default_float_rounding_mode_t default_float_rounding_mode, + hsa_code_object_type_t code_object_type, + int32_t call_convention, + hsa_ext_control_directives_t control_directives, + hsa_code_object_t* code_object, + hsa_executable_t* executable) { + int i; + int rc; + hsa_status_t status; + + // Create the program + hsa_ext_program_t program; + memset(&program, 0, sizeof(hsa_ext_program_t)); + status = hsa_ext_program_create(machine_model, profile, default_float_rounding_mode, NULL, &program); + EXIT_IF(HSA_STATUS_SUCCESS != status); + + // Add the brig modules to the program + for (i = 0; i < module_count; ++i) { + status = hsa_ext_program_add_module(program, modules[i]); + EXIT_IF(HSA_STATUS_SUCCESS != status); + } + + // Determine the agents ISA + hsa_isa_t isa; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &isa); + EXIT_IF(HSA_STATUS_SUCCESS != status); + + // Finalize the program and extract the code object + status = hsa_ext_program_finalize(program, isa, call_convention, control_directives, "", code_object_type, code_object); + EXIT_IF(HSA_STATUS_SUCCESS != status); + + // Create the empty executable + status = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", executable); + EXIT_IF(HSA_STATUS_SUCCESS != status); + + // Load the code object + status = hsa_executable_load_code_object(*executable, agent, *code_object, ""); + EXIT_IF(HSA_STATUS_SUCCESS != status); + + // Freeze the executable; it can now be queried for symbols + status = hsa_executable_freeze(*executable, ""); + EXIT_IF(HSA_STATUS_SUCCESS != status); + + exit: + // Releasing these resources should not affect the executable + hsa_ext_program_destroy(program); + + return status; +} + +hsa_status_t get_executable_symbols(hsa_executable_t executable, + hsa_agent_t agent, + uint32_t call_convention, + uint32_t symbol_count, + char** symbol_names, + symbol_record_t* symbol_record_list) { + int i; + hsa_status_t status; + + for (i = 0; i < symbol_count; ++i) { + status = hsa_executable_get_symbol(executable, + NULL, + symbol_names[i], + agent, + call_convention, + &(symbol_record_list[i].symbol)); + + EXIT_IF(HSA_STATUS_SUCCESS != status); + + // Get all off the symbols relevant information + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_TYPE, type); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, name_length); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH, module_name_length); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME, module_name) + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_AGENT, agent); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, variable_address); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE, linkage); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION, variable_allocation); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT, variable_segment); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT, variable_alignment); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, variable_size); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST, is_const); + GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, kernel_object); + GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, kernarg_segment_size); + GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, kernarg_segment_alignment); + GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, group_segment_size); + GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, private_segment_size); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, dynamic_callstack); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT, indirect_function_object); + // GET_SYMBOL_INFO(symbol_record_list[i], HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION, indirect_function_call_convention); + } + + exit: + + return status; +} diff --git a/src/utils/finalize_utils.h b/src/utils/finalize_utils.h new file mode 100644 index 0000000..e47c314 --- /dev/null +++ b/src/utils/finalize_utils.h @@ -0,0 +1,133 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _FINALIZE_UTILS_H_ +#define _FINALIZE_UTILS_H_ + +#include +#include + +typedef struct symbol_record_s { + hsa_executable_symbol_t symbol; + hsa_symbol_kind_t type; + uint32_t name_length; + char* name; + uint32_t module_name_length; + char* module_name; + hsa_agent_t agent; + uint64_t variable_address; + hsa_symbol_linkage_t linkage; + hsa_variable_allocation_t variable_allocation; + hsa_variable_segment_t variable_segment; + uint32_t variable_alignment; + uint32_t variable_size; + bool is_const; + uint64_t kernel_object; + uint32_t kernarg_segment_size; + uint32_t kernarg_segment_alignment; + uint32_t group_segment_size; + uint32_t private_segment_size; + bool dynamic_callstack; + uint64_t indirect_function_object; + uint32_t indirect_function_call_convention; +} symbol_record_t; + +typedef struct hsa_ext_finalizer_pfn_s { + hsa_status_t (*hsa_ext_program_create)(hsa_machine_model_t machine_model, + hsa_profile_t profile, + hsa_default_float_rounding_mode_t default_float_rounding_mode, + const char *options, + hsa_ext_program_t *program); + + hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program); + + hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program, + hsa_ext_module_t module); + + hsa_status_t (*hsa_ext_program_iterate_modules)( + hsa_ext_program_t program, + hsa_status_t (*callback)(hsa_ext_program_t program, + hsa_ext_module_t module, void *data), + void *data); + + hsa_status_t (*hsa_ext_program_get_info)(hsa_ext_program_t program, + hsa_ext_program_info_t attribute, + void *value); + + hsa_status_t (*hsa_ext_program_finalize)(hsa_ext_program_t program, + hsa_isa_t isa, + int32_t call_convention, + hsa_ext_control_directives_t control_directives, + const char *options, + hsa_code_object_type_t code_object_type, + hsa_code_object_t *code_object); +} hsa_ext_finalizer_pfn_t; + +hsa_status_t get_finalization_fnc_tbl(hsa_ext_finalizer_pfn_t *table); + +int load_module_from_file(const char* file, hsa_ext_module_t* module); + +void destroy_module(hsa_ext_module_t module); + +hsa_status_t finalize_executable(hsa_agent_t agent, + uint32_t module_count, + hsa_ext_module_t *modules, + hsa_machine_model_t machine_model, + hsa_profile_t profile, + hsa_default_float_rounding_mode_t default_float_rounding_mode, + hsa_code_object_type_t code_object_type, + int32_t call_convention, + hsa_ext_control_directives_t control_directives, + hsa_code_object_t* code_object, + hsa_executable_t* executable); + +hsa_status_t get_executable_symbols(hsa_executable_t executable, + hsa_agent_t agent, + uint32_t call_convention, + uint32_t symbol_count, + char** symbol_names, + symbol_record_t* symbol_record_list); + +#endif // _FINALIZE_UTILS_H_ diff --git a/src/utils/framework.h b/src/utils/framework.h new file mode 100644 index 0000000..d98b933 --- /dev/null +++ b/src/utils/framework.h @@ -0,0 +1,111 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _FRAMEWORK_H_ +#define _FRAMEWORK_H_ + +#include +#include + +#ifndef EXIT_SUCCESS + #define EXIT_SUCCESS 0 +#endif + +#ifndef EXIT_FAILURE + #define EXIT_FAILURE -1 +#endif + +#define DEFINE_TEST(__test_name__) \ +START_TEST(__test_name__) { \ + int error = test_##__test_name__(); \ + ck_assert_int_eq(error, 0); \ +} \ +END_TEST + +#define INITIALIZE_TESTSUITE(__test_suite__) \ + int number_failed = 0; \ + Suite *suite = suite_create(#__test_suite__); \ + SRunner *runner = srunner_create(suite); \ + TCase *test_case; + +#define ADD_TEST(__test_name__) \ + test_case = tcase_create(#__test_name__); \ + tcase_add_test(test_case, __test_name__); \ + suite_add_tcase(suite, test_case); + +#define RUN_TESTS() \ + srunner_run_all(runner, CK_NORMAL); \ + number_failed = srunner_ntests_failed(runner); \ + srunner_free(runner); \ + return(number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; + +// Convenience Test Function Mappings +#define ASSERT(expr) ck_assert(expr) +#define ASSERT_MSG(expr, ...) ck_assert_msg(expr, ##__VA_ARGS__) +#define ABORT ck_abort() +#define ABORT_MSG(...) ck_abort_msg(##__VA_ARGS__) +#define ASSERT_INT_EQ(X, Y) ck_assert_int_eq(X, Y) +#define ASSERT_INT_NE(X, Y) ck_assert_int_ne(X, Y) +#define ASSERT_INT_LT(X, Y) ck_assert_int_lt(X, Y) +#define ASSERT_INT_LE(X, Y) ck_assert_int_le(X, Y) +#define ASSERT_INT_GT(X, Y) ck_assert_int_gt(X, Y) +#define ASSERT_INT_GE(X, Y) ck_assert_int_ge(X, Y) +#define ASSERT_UINT_EQ(X, Y) ck_assert_uint_eq(X, Y) +#define ASSERT_UINT_NE(X, Y) ck_assert_uint_ne(X, Y) +#define ASSERT_UINT_LT(X, Y) ck_assert_uint_lt(X, Y) +#define ASSERT_UINT_LE(X, Y) ck_assert_uint_le(X, Y) +#define ASSERT_UINT_GT(X, Y) ck_assert_uint_gt(X, Y) +#define ASSERT_UINT_GE(X, Y) ck_assert_uint_ge(X, Y) +#define ASSERT_STR_EQ(X, Y) ck_assert_str_eq(X, Y) +#define ASSERT_STR_NE(X, Y) ck_assert_str_ne(X, Y) +#define ASSERT_STR_LT(X, Y) ck_assert_str_lt(X, Y) +#define ASSERT_STR_LE(X, Y) ck_assert_str_le(X, Y) +#define ASSERT_STR_GT(X, Y) ck_assert_str_gt(X, Y) +#define ASSERT_STR_GE(X, Y) ck_assert_str_ge(X, Y) +#define ASSERT_PTR_EQ(X, Y) ck_assert_ptr_eq(X, Y) +#define ASSERT_PTR_NE(X, Y) ck_assert_ptr_ne(X, Y) +#define MARK_SRC_POINT() mark_point() + +#endif // _FRAMEWORK_H_ diff --git a/src/utils/image_utils.c b/src/utils/image_utils.c new file mode 100644 index 0000000..b5fc8d4 --- /dev/null +++ b/src/utils/image_utils.c @@ -0,0 +1,278 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +static char* VERIFY_IMAGE_REGION_KERNEL_1D[3] = {"&__verify_image_region_kernel_s32_1d", "&__verify_image_region_kernel_u32_1d", "&__verify_image_region_kernel_f32_1d"}; +static char* VERIFY_IMAGE_REGION_KERNEL_1DA[3] = {"&__verify_image_region_kernel_s32_1da", "&__verify_image_region_kernel_u32_1da", "&__verify_image_region_kernel_f32_1da"}; +static char* VERIFY_IMAGE_REGION_KERNEL_1DB[3] = {"&__verify_image_region_kernel_s32_1db", "&__verify_image_region_kernel_u32_1db", "&__verify_image_region_kernel_f32_1db"}; +static char* VERIFY_IMAGE_REGION_KERNEL_2D[3] = {"&__verify_image_region_kernel_s32_2d", "&__verify_image_region_kernel_u32_2d", "&__verify_image_region_kernel_f32_2d"}; +static char* VERIFY_IMAGE_REGION_KERNEL_2DA[3] = {"&__verify_image_region_kernel_s32_2da", "&__verify_image_region_kernel_u32_2da", "&__verify_image_region_kernel_f32_2da"}; +static char* VERIFY_IMAGE_REGION_KERNEL_2DDEPTH[3] = {"&__verify_image_region_kernel_s32_2ddepth", "&__verify_image_region_kernel_u32_2ddepth", "&__verify_image_region_kernel_f32_2ddepth"}; +static char* VERIFY_IMAGE_REGION_KERNEL_2DADEPTH[3] = {"&__verify_image_region_kernel_s32_2dadepth", "&__verify_image_region_kernel_u32_2dadepth", "&__verify_image_region_kernel_f32_2dadepth"}; +static char* VERIFY_IMAGE_REGION_KERNEL_3D[3] = {"&__verify_image_region_kernel_s32_3d", "&__verify_image_region_kernel_u32_3d", "&__verify_image_region_kernel_f32_3d"}; + +// Returns the number of bits that are used to represent the scale of the pixel. +uint32_t get_channel_type_bits(hsa_ext_image_channel_type_t channel_type) { + switch (channel_type) { + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 : { return 5; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 : { return 5; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 : { return 7; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 : { return 8; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 : { return 8; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 : { return 7; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 : { return 10; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 : { return 15; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 : { return 16; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT : { return 16; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 : { return 15; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 : { return 24; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 : { return 31; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 : { return 16; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT : { return 21; } + case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 : { return 32; } + default : { return 0; } + } + + return 0; +} + +int get_kernel_index(hsa_ext_image_channel_type_t channel_type) { + if (HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 == channel_type || + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 == channel_type || + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 == channel_type) { + return 0; + } else if (HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 == channel_type || + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 == channel_type || + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 == channel_type) { + return 1; + } + + return 2; +} + +// Obtain the image extension function table +hsa_status_t get_image_fnc_tbl(hsa_ext_image_pfn_t* table) { + bool support; + hsa_status_t status; + + if (NULL == table) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + status = hsa_system_extension_supported(HSA_EXTENSION_IMAGES, 1, 0, &support); + + if (HSA_STATUS_SUCCESS != status) { + goto exit; + } + + if (!support) { + status = HSA_STATUS_ERROR; + goto exit; + } + + hsa_ext_images_1_00_pfn_t table_1_00; + + status = hsa_system_get_extension_table(HSA_EXTENSION_IMAGES, 1, 0, &table_1_00); + + if (HSA_STATUS_SUCCESS != status) { + goto exit; + } + + // Fill in the table. + table->hsa_ext_image_get_capability = table_1_00.hsa_ext_image_get_capability; + table->hsa_ext_image_data_get_info = table_1_00.hsa_ext_image_data_get_info; + table->hsa_ext_image_create = table_1_00.hsa_ext_image_create; + table->hsa_ext_image_destroy = table_1_00.hsa_ext_image_destroy; + table->hsa_ext_image_copy = table_1_00.hsa_ext_image_copy; + table->hsa_ext_image_import = table_1_00.hsa_ext_image_import; + table->hsa_ext_image_export = table_1_00.hsa_ext_image_export; + table->hsa_ext_image_clear = table_1_00.hsa_ext_image_clear; + table->hsa_ext_sampler_create = table_1_00.hsa_ext_sampler_create; + table->hsa_ext_sampler_destroy = table_1_00.hsa_ext_sampler_destroy; + + exit: + + return status; +} + +// Get the geometric information of the image +void get_geometry_info(hsa_agent_t agent, + hsa_ext_image_format_t* format, + hsa_ext_image_geometry_t geometry, + int* image_dimension, + uint32_t* max_elements, + char** validation_kernel) { + hsa_agent_info_t max_elements_attribute; + switch (geometry) { + case HSA_EXT_IMAGE_GEOMETRY_1D: + *image_dimension = 1; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_1D[get_kernel_index(format->channel_type)]; + break; + case HSA_EXT_IMAGE_GEOMETRY_1DA: + *image_dimension = 1; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_1DA[get_kernel_index(format->channel_type)]; + break; + case HSA_EXT_IMAGE_GEOMETRY_1DB: + *image_dimension = 1; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_1DB[get_kernel_index(format->channel_type)]; + break; + case HSA_EXT_IMAGE_GEOMETRY_2D: + *image_dimension = 2; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_2D[get_kernel_index(format->channel_type)]; + break; + case HSA_EXT_IMAGE_GEOMETRY_2DA: + *image_dimension = 2; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_2DA[get_kernel_index(format->channel_type)]; + break; + case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH: + *image_dimension = 2; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_2DDEPTH[get_kernel_index(format->channel_type)]; + break; + case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH: + *image_dimension = 2; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_2DADEPTH[get_kernel_index(format->channel_type)]; + break; + case HSA_EXT_IMAGE_GEOMETRY_3D: + *image_dimension = 3; + max_elements_attribute = HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS; + *validation_kernel = VERIFY_IMAGE_REGION_KERNEL_3D[get_kernel_index(format->channel_type)]; + break; + default: + break; + } + + hsa_status_t status = hsa_agent_get_info(agent, + max_elements_attribute, + max_elements); + + ASSERT(HSA_STATUS_SUCCESS == status); + + max_elements[1] = (2 <= *image_dimension) ? max_elements[1] : 1; + max_elements[2] = (3 <= *image_dimension) ? max_elements[2] : 1; + + return; +} + +// Fill int the comparison data for the compare buffer +uint32_t get_cmp_info(hsa_ext_image_channel_order_t order) { + switch (order) { + case HSA_EXT_IMAGE_CHANNEL_ORDER_A: { + return 0x0001; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_R : { + return 0x1000; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_RX : { + return 0x1000; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_RG : { + return 0x1100; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_RGX : { + return 0x1100; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_RA : { + return 0x1001; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_RGB : { + return 0x1110; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX : { + return 0x1110; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA : { + return 0x1111; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA : { + return 0x1111; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB : { + return 0x1111; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR : { + return 0x1111; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB : { + return 0x1110; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX : { + return 0x1110; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA : { + return 0x1111; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA : { + return 0x1111; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY : { + return 0x1000; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE : { + return 0x1000; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH : { + return 0x1000; + } + case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL : { + return 0x1000; + } + default: { + } + } + + return 0x0000; +} diff --git a/src/utils/image_utils.h b/src/utils/image_utils.h new file mode 100644 index 0000000..f70854c --- /dev/null +++ b/src/utils/image_utils.h @@ -0,0 +1,116 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _IMAGE_UTILS_H_ +#define _IMAGE_UTILS_H_ + +#include +#include + +typedef struct hsa_ext_image_pfn_s { + hsa_status_t (*hsa_ext_image_get_capability)( + hsa_agent_t agent, hsa_ext_image_geometry_t geometry, + const hsa_ext_image_format_t *image_format, uint32_t *capability_mask); + + hsa_status_t (*hsa_ext_image_data_get_info)( + hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor, + hsa_access_permission_t access_permission, + hsa_ext_image_data_info_t *image_data_info); + + hsa_status_t (*hsa_ext_image_create)( + hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor, + const void *image_data, hsa_access_permission_t access_permission, + hsa_ext_image_t *image); + + hsa_status_t (*hsa_ext_image_destroy)(hsa_agent_t agent, + hsa_ext_image_t image); + + hsa_status_t (*hsa_ext_image_copy)(hsa_agent_t agent, + hsa_ext_image_t src_image, + const hsa_dim3_t *src_offset, + hsa_ext_image_t dst_image, + const hsa_dim3_t *dst_offset, + const hsa_dim3_t *range); + + hsa_status_t (*hsa_ext_image_import)( + hsa_agent_t agent, const void *src_memory, size_t src_row_pitch, + size_t src_slice_pitch, hsa_ext_image_t dst_image, + const hsa_ext_image_region_t *image_region); + + hsa_status_t (*hsa_ext_image_export)( + hsa_agent_t agent, hsa_ext_image_t src_image, void *dst_memory, + size_t dst_row_pitch, size_t dst_slice_pitch, + const hsa_ext_image_region_t *image_region); + + hsa_status_t (*hsa_ext_image_clear)( + hsa_agent_t agent, hsa_ext_image_t image, const void *data, + const hsa_ext_image_region_t *image_region); + + hsa_status_t (*hsa_ext_sampler_create)( + hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor, + hsa_ext_sampler_t *sampler); + + hsa_status_t (*hsa_ext_sampler_destroy)(hsa_agent_t agent, + hsa_ext_sampler_t sampler); +} hsa_ext_image_pfn_t; + +// Obtain the image extension function table +hsa_status_t get_image_fnc_tbl(hsa_ext_image_pfn_t* table); + +// Get information about a specific image geometry on target agent. +void get_geometry_info(hsa_agent_t agent, + hsa_ext_image_format_t* format, + hsa_ext_image_geometry_t geometry, + int* image_dimension, + uint32_t* max_elements, + char** validation_kernel); + +// Get information about pixel comparison operations. +uint32_t get_cmp_info(hsa_ext_image_channel_order_t order); + +// Get the number of bits per pixel in for the specified channel type. +uint32_t get_channel_type_bits(hsa_ext_image_channel_type_t channel_type); + +#endif // _IMAGE_UTILS_H_ diff --git a/src/utils/queue_utils.c b/src/utils/queue_utils.c new file mode 100644 index 0000000..8a93517 --- /dev/null +++ b/src/utils/queue_utils.c @@ -0,0 +1,138 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include "queue_utils.h" + +void enqueue_dispatch_packet(hsa_queue_t *queue, + hsa_kernel_dispatch_packet_t *packet) { + // Reserve the write_index for the packet + uint64_t write_index = hsa_queue_add_write_index_relaxed(queue, 1); + + // Enqueue the packet at the location + enqueue_dispatch_packet_at(write_index, queue, packet); + + return; +} + +void enqueue_dispatch_packet_at(uint64_t write_index, + hsa_queue_t *queue, + hsa_kernel_dispatch_packet_t *packet) { + // Block until the queue has an empty packet slot + uint64_t delta; + do { + delta = write_index - hsa_queue_load_read_index_relaxed(queue); + } while (delta > queue->size); + + const uint32_t queue_mask = queue->size - 1; + + hsa_kernel_dispatch_packet_t* packet_base + = (hsa_kernel_dispatch_packet_t*) &((hsa_kernel_dispatch_packet_t*)(queue->base_address))[write_index&queue_mask]; + + // Copy over the packet information + memcpy(&packet_base->setup, &packet->setup, sizeof(hsa_kernel_dispatch_packet_t) - sizeof(packet_base->header)); + + // Atomically set the packet header + __atomic_store_n((uint16_t*) packet_base, packet->header, __ATOMIC_RELEASE); + + // Ring the doorbell. + hsa_signal_store_relaxed(queue->doorbell_signal, write_index); + + return; +} + +void enqueue_dispatch_packets(hsa_queue_t *queue, + uint32_t packet_count, + hsa_kernel_dispatch_packet_t packet[]) { + if (packet_count > 0) { + // Reserve the write_index for the packet + uint64_t write_index = hsa_queue_add_write_index_relaxed(queue, packet_count); + + // Dispatch the packets + enqueue_dispatch_packets_at(write_index, queue, packet_count, packet); + } + + return; +} + +void enqueue_dispatch_packets_at(uint64_t write_index, + hsa_queue_t *queue, + uint32_t packet_count, + hsa_kernel_dispatch_packet_t packet[]) { + if (packet_count > 0) { + // Block until the queue has packet_count empty packet slots + while (packet_count > 0) { + uint32_t dispatch_count = (packet_count > queue->size) ? queue->size : packet_count; + + uint64_t delta; + do { + delta = write_index + dispatch_count - hsa_queue_load_read_index_relaxed(queue); + } while (delta > queue->size); + + const uint32_t queue_mask = queue->size - 1; + + for (uint32_t i = 0; i < dispatch_count; ++i) { + hsa_kernel_dispatch_packet_t* packet_base + = (hsa_kernel_dispatch_packet_t*) &((hsa_kernel_dispatch_packet_t*)(queue->base_address))[write_index&queue_mask]; + + // Copy over the packet information + memcpy(&packet_base->setup, &packet[i].setup, sizeof(hsa_kernel_dispatch_packet_t) - sizeof(packet_base->header)); + + // Atomically set the packet header + __atomic_store_n((uint16_t*) packet_base, packet[i].header, __ATOMIC_RELEASE); + + ++write_index; + } + + // Ring the doorbell. + hsa_signal_store_relaxed(queue->doorbell_signal, write_index); + + // Decrement the packet count + packet_count -= dispatch_count; + } + } + + return; +} diff --git a/src/utils/queue_utils.h b/src/utils/queue_utils.h new file mode 100644 index 0000000..7082d75 --- /dev/null +++ b/src/utils/queue_utils.h @@ -0,0 +1,59 @@ +/* + * ============================================================================= + * HSA Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2014, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD HSA Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef _QUEUE_UTILS_H_ +#define _QUEUE_UTILS_H_ + +#include + +void enqueue_dispatch_packet(hsa_queue_t *queue, hsa_kernel_dispatch_packet_t *packet); + +void enqueue_dispatch_packet_at(uint64_t write_index, hsa_queue_t *queue, hsa_kernel_dispatch_packet_t *packet); + +void enqueue_dispatch_packets(hsa_queue_t *queue, uint32_t packet_count, hsa_kernel_dispatch_packet_t packet[]); + +void enqueue_dispatch_packets_at(uint64_t write_index, hsa_queue_t *queue, uint32_t packet_count, hsa_kernel_dispatch_packet_t packet[]); + +#endif // _QUEUE_UTILS_H_