diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8aabaf94c1..778f180595 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -4,7 +4,7 @@ jobs:
   build_docker:
     strategy:
       matrix:
-        target: [gcc12, gcc13, clang13, clang15, rocm5.6, rocm5.6_desul, intel2024, intel2024_debug, intel2024_sycl]
+        target: [gcc12_debug, gcc13, clang13, clang15, rocm5.6, rocm5.6_desul, intel2024, intel2024_debug, intel2024_sycl]
     runs-on: ubuntu-latest
     steps:
     - run: |
diff --git a/Dockerfile b/Dockerfile
index 4e224371eb..66a50c3794 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -17,22 +17,24 @@ COPY . /home/raja/workspace
 WORKDIR /home/raja/workspace/build
 RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \
     make -j 6 &&\
-    ctest -T test --output-on-failure
+    ctest -T test --output-on-failure && \
+    make clean
 
 FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12
 ENV GTEST_COLOR=1
 COPY . /home/raja/workspace
 WORKDIR /home/raja/workspace/build
 RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \
-    make -j 16 &&\
-    ctest -T test --output-on-failure
+    make -j 6 &&\
+    ctest -T test --output-on-failure && \
+    make clean
 
 FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12_debug
 ENV GTEST_COLOR=1
 COPY . /home/raja/workspace
 WORKDIR /home/raja/workspace/build
 RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \
-    make -j 6 &&\
+    make -j 16 &&\
     ctest -T test --output-on-failure
 
 FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12_desul
@@ -41,7 +43,8 @@ COPY . /home/raja/workspace
 WORKDIR /home/raja/workspace/build
 RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On .. && \
     make -j 6 &&\
-    ctest -T test --output-on-failure
+    ctest -T test --output-on-failure && \
+    make clean
 
 FROM ghcr.io/llnl/radiuss:gcc-13-ubuntu-22.04 AS gcc13
 ENV GTEST_COLOR=1
@@ -65,7 +68,8 @@ COPY . /home/raja/workspace
 WORKDIR /home/raja/workspace/build
 RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug  -DENABLE_OPENMP=On .. && \
     make -j 6 &&\
-    ctest -T test --output-on-failure
+    ctest -T test --output-on-failure && \
+    make clean
 
 FROM ghcr.io/llnl/radiuss:clang-15-ubuntu-22.04 AS clang15
 ENV GTEST_COLOR=1
@@ -81,7 +85,8 @@ COPY . /home/raja/workspace
 WORKDIR /home/raja/workspace/build
 RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On .. && \
     make -j 6 &&\
-    ctest -T test --output-on-failure
+    ctest -T test --output-on-failure && \
+    make clean
 
 ## Test run failure in RAJA launch tests with new reducer interface.
 ## Need to figure out best way to handle that.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4535138152..e78ed7a10b 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -4,8 +4,10 @@ jobs:
     matrix:
       shared:
         SHARED_ARGS: '-DBUILD_SHARED_LIBS=On'
-      static:
-        SHARED_ARGS: '-DBUILD_SHARED_LIBS=Off'
+###########################
+## Windows static build is run on GitHub Actions
+##    static:
+##      SHARED_ARGS: '-DBUILD_SHARED_LIBS=Off'
   pool:
     vmImage: 'windows-2019'
   variables:
@@ -36,8 +38,8 @@ jobs:
     matrix: 
       gcc11:
         docker_target: gcc11
-      gcc12_debug:
-        docker_target: gcc12_debug
+      gcc12:
+        docker_target: gcc12
       gcc12_desul:
         docker_target: gcc12_desul
       clang14_debug:
diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp
index 0db1dc4e0d..75e5f6902b 100644
--- a/include/RAJA/policy/cuda/launch.hpp
+++ b/include/RAJA/policy/cuda/launch.hpp
@@ -209,7 +209,8 @@ void launch_global_fcn_fixed(BODY body_in)
 }
 
 template <typename BODY, int num_threads, size_t BLOCKS_PER_SM, typename ReduceParams>
-__global__ void launch_new_reduce_global_fcn_fixed(BODY body_in, ReduceParams reduce_params)
+__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__
+void launch_new_reduce_global_fcn_fixed(BODY body_in, ReduceParams reduce_params)
 {
   LaunchContext ctx;
 
@@ -298,7 +299,7 @@ struct LaunchExecute<RAJA::policy::cuda::cuda_launch_explicit_t<async, nthreads,
     using BODY = camp::decay<BODY_IN>;
 
     auto func = reinterpret_cast<const void*>(
-        &launch_new_reduce_global_fcn<BODY, camp::decay<ReduceParams>>);
+        &launch_new_reduce_global_fcn_fixed<BODY, nthreads, BLOCKS_PER_SM, camp::decay<ReduceParams>>);
 
     resources::Cuda cuda_res = res.get<RAJA::resources::Cuda>();