diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9b31cbe124..3eb0dbc8d2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -171,6 +171,7 @@ endif()
 
 if(RAJA_ENABLE_HIP)
   message(STATUS "HIP version: ${hip_VERSION}")
+  set(RAJA_HIP_WAVESIZE "64" CACHE STRING "Set the wave size for GPU architecture. E.g. MI200/MI300 this is 64.")
   if("${hip_VERSION}" VERSION_LESS "3.5")
     message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA requires HIP/ROCm version 3.5 or newer. ")
   endif()
diff --git a/examples/dynamic-forall.cpp b/examples/dynamic-forall.cpp
index 5131010bd6..d405408e7f 100644
--- a/examples/dynamic-forall.cpp
+++ b/examples/dynamic-forall.cpp
@@ -77,28 +77,25 @@ int main(int argc, char *argv[])
 
 //----------------------------------------------------------------------------//
 
-  std::cout << "\n Running C-style vector addition...\n";
-
-  // _cstyle_vector_add_start
-  for (int i = 0; i < N; ++i) {
-    c[i] = a[i] + b[i];
-  }
-  // _cstyle_vector_add_end
-
-  checkResult(c, N);
-//printResult(c, N);
-
-
-//----------------------------------------------------------------------------//
-// Example of dynamic policy selection for forall
-//----------------------------------------------------------------------------//
+  std::cout << "\n Running dynamic forall vector addition and reductions...\n";
 
+  int sum = 0;
+  using VAL_INT_SUM = RAJA::expt::ValOp<int, RAJA::operators::plus>;
+  
+  RAJA::RangeSegment range(0, N); 
+  
   //policy is chosen from the list
-  RAJA::expt::dynamic_forall<policy_list>(pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE (int i)   {
+  RAJA::dynamic_forall<policy_list>(pol, range,
+    RAJA::expt::Reduce<RAJA::operators::plus>(&sum),
+      RAJA::expt::KernelName("RAJA dynamic forall"),
+      [=] RAJA_HOST_DEVICE (int i, VAL_INT_SUM &_sum) {
+      
       c[i] = a[i] + b[i];
+      _sum += 1;
   });
   // _rajaseq_vector_add_end
 
+  std::cout << "Sum = " << sum << ", expected sum: " << N << std::endl;
   checkResult(c, N);
 //printResult(c, N);
 
@@ -126,9 +123,9 @@ void checkResult(int* res, int len)
     if ( res[i] != 0 ) { correct = false; }
   }
   if ( correct ) {
-    std::cout << "\n\t result -- PASS\n";
+    std::cout << "\n\t Vector sum result -- PASS\n";
   } else {
-    std::cout << "\n\t result -- FAIL\n";
+    std::cout << "\n\t Vector sum result -- FAIL\n";
   }
 }
 
diff --git a/examples/memoryManager.hpp b/examples/memoryManager.hpp
index 62d3d6e3e7..685a572033 100644
--- a/examples/memoryManager.hpp
+++ b/examples/memoryManager.hpp
@@ -76,7 +76,7 @@ void deallocate(T *&ptr)
     hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size));
 #elif defined(RAJA_ENABLE_SYCL)
       auto qu = sycl_res->get<camp::resources::Sycl>().get_queue();
-      ptr = cl::sycl::malloc_device<T>(size, *qu);
+      ptr = ::sycl::malloc_device<T>(size, *qu);
 #endif
     return ptr;
   }
diff --git a/examples/resource-dynamic-forall.cpp b/examples/resource-dynamic-forall.cpp
index 0b35017fac..ac6fd62398 100644
--- a/examples/resource-dynamic-forall.cpp
+++ b/examples/resource-dynamic-forall.cpp
@@ -121,7 +121,7 @@ int main(int argc, char *argv[])
   RAJA::resources::Resource res = RAJA::Get_Host_Resource(host_res, select_cpu_or_gpu);
 #endif
 
-  RAJA::expt::dynamic_forall<policy_list>
+  RAJA::dynamic_forall<policy_list>
   (res, pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE (int i)   {
 
     c[i] = a[i] + b[i];
diff --git a/exercises/memoryManager.hpp b/exercises/memoryManager.hpp
index 62d3d6e3e7..685a572033 100644
--- a/exercises/memoryManager.hpp
+++ b/exercises/memoryManager.hpp
@@ -76,7 +76,7 @@ void deallocate(T *&ptr)
     hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size));
 #elif defined(RAJA_ENABLE_SYCL)
       auto qu = sycl_res->get<camp::resources::Sycl>().get_queue();
-      ptr = cl::sycl::malloc_device<T>(size, *qu);
+      ptr = ::sycl::malloc_device<T>(size, *qu);
 #endif
     return ptr;
   }
diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 380418efa1..29d97fed69 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -182,6 +182,8 @@ static_assert(RAJA_HAS_SOME_CXX14,
 #cmakedefine RAJA_ENABLE_NV_TOOLS_EXT
 #cmakedefine RAJA_ENABLE_ROCTX
 
+#cmakedefine RAJA_HIP_WAVESIZE @RAJA_HIP_WAVESIZE@
+
 /*!
  ******************************************************************************
  *
diff --git a/include/RAJA/pattern/forall.hpp b/include/RAJA/pattern/forall.hpp
index 686f0e8c6b..e75cc43af7 100644
--- a/include/RAJA/pattern/forall.hpp
+++ b/include/RAJA/pattern/forall.hpp
@@ -647,104 +647,99 @@ RAJA_INLINE camp::resources::EventProxy<Res> CallForallIcount::operator()(T cons
 // - Returns a generic event proxy only if a resource is provided
 //   avoids overhead of constructing a typed erased resource
 //
-namespace expt
+template<camp::idx_t IDX, typename POLICY_LIST>
+struct dynamic_helper
 {
-
-  template<camp::idx_t IDX, typename POLICY_LIST>
-  struct dynamic_helper
+  template<typename SEGMENT, typename... PARAMS>
+  static void invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params)
   {
-    template<typename SEGMENT, typename BODY>
-    static void invoke_forall(const int pol, SEGMENT const &seg, BODY const &body)
-    {
-      if(IDX==pol){
-        using t_pol = typename camp::at<POLICY_LIST,camp::num<IDX>>::type;
-        RAJA::forall<t_pol>(seg, body);
-        return;
-      }
-      dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(pol, seg, body);
-    }
-
-    template<typename SEGMENT, typename BODY>
-    static resources::EventProxy<resources::Resource>
-    invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body)
-    {
-
+    if(IDX==pol){
       using t_pol = typename camp::at<POLICY_LIST,camp::num<IDX>>::type;
-      using resource_type = typename resources::get_resource<t_pol>::type;
-
-      if(IDX==pol){
-        RAJA::forall<t_pol>(r.get<resource_type>(), seg, body);
-
-        //Return a generic event proxy from r,
-        //because forall returns a typed event proxy
-        return {r};
-      }
-
-      return dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(r, pol, seg, body);
+      RAJA::forall<t_pol>(seg, params...);
+      return;
     }
+    dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(pol, seg, params...);
+  }
 
-  };
-
-  template<typename POLICY_LIST>
-  struct dynamic_helper<0, POLICY_LIST>
+  template<typename SEGMENT, typename... PARAMS>
+  static resources::EventProxy<resources::Resource>
+  invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params)
   {
-    template<typename SEGMENT, typename BODY>
-    static void
-    invoke_forall(const int pol, SEGMENT const &seg, BODY const &body)
-    {
-      if(0==pol){
-        using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
-        RAJA::forall<t_pol>(seg, body);
-        return;
-      }
-      RAJA_ABORT_OR_THROW("Policy enum not supported ");
-    }
 
-    template<typename SEGMENT, typename BODY>
-    static resources::EventProxy<resources::Resource>
-    invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body)
-    {
-      if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range ");
-
-      using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
-      using resource_type = typename resources::get_resource<t_pol>::type;
+    using t_pol = typename camp::at<POLICY_LIST,camp::num<IDX>>::type;
+    using resource_type = typename resources::get_resource<t_pol>::type;
 
-      RAJA::forall<t_pol>(r.get<resource_type>(), seg, body);
+    if(IDX==pol){
+      RAJA::forall<t_pol>(r.get<resource_type>(), seg, params...);
 
       //Return a generic event proxy from r,
       //because forall returns a typed event proxy
       return {r};
     }
 
-  };
+    return dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(r, pol, seg, params...);
+  }
 
-  template<typename POLICY_LIST, typename SEGMENT, typename BODY>
-  void dynamic_forall(const int pol, SEGMENT const &seg, BODY const &body)
-  {
-    constexpr int N = camp::size<POLICY_LIST>::value;
-    static_assert(N > 0, "RAJA policy list must not be empty");
+};
 
-    if(pol > N-1)  {
-      RAJA_ABORT_OR_THROW("Policy enum not supported");
+template<typename POLICY_LIST>
+struct dynamic_helper<0, POLICY_LIST>
+{
+  template<typename SEGMENT, typename... PARAMS>
+  static void
+  invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params)
+  {
+    if(0==pol){
+      using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
+      RAJA::forall<t_pol>(seg, params...);
+      return;
     }
-    dynamic_helper<N-1, POLICY_LIST>::invoke_forall(pol, seg, body);
+    RAJA_ABORT_OR_THROW("Policy enum not supported ");
   }
 
-  template<typename POLICY_LIST, typename SEGMENT, typename BODY>
-  resources::EventProxy<resources::Resource>
-  dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body)
+  template<typename SEGMENT, typename... PARAMS>
+  static resources::EventProxy<resources::Resource>
+  invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params)
   {
-    constexpr int N = camp::size<POLICY_LIST>::value;
-    static_assert(N > 0, "RAJA policy list must not be empty");
+    if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range ");
 
-    if(pol > N-1)  {
-      RAJA_ABORT_OR_THROW("Policy value out of range");
-    }
+    using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
+    using resource_type = typename resources::get_resource<t_pol>::type;
+
+    RAJA::forall<t_pol>(r.get<resource_type>(), seg, params...);
+
+    //Return a generic event proxy from r,
+    //because forall returns a typed event proxy
+    return {r};
+  }
+
+};
 
-    return dynamic_helper<N-1, POLICY_LIST>::invoke_forall(r, pol, seg, body);
+template<typename POLICY_LIST, typename SEGMENT, typename... PARAMS>
+void dynamic_forall(const int pol, SEGMENT const &seg, PARAMS&&... params)
+{
+  constexpr int N = camp::size<POLICY_LIST>::value;
+  static_assert(N > 0, "RAJA policy list must not be empty");
+
+  if(pol > N-1)  {
+    RAJA_ABORT_OR_THROW("Policy enum not supported");
+  }
+  dynamic_helper<N-1, POLICY_LIST>::invoke_forall(pol, seg, params...);
+}
+
+template<typename POLICY_LIST, typename SEGMENT, typename... PARAMS>
+resources::EventProxy<resources::Resource>
+dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params)
+{
+  constexpr int N = camp::size<POLICY_LIST>::value;
+  static_assert(N > 0, "RAJA policy list must not be empty");
+
+  if(pol > N-1)  {
+    RAJA_ABORT_OR_THROW("Policy value out of range");
   }
 
-}  // namespace expt
+  return dynamic_helper<N-1, POLICY_LIST>::invoke_forall(r, pol, seg, params...);
+}
 
 
 }  // namespace RAJA
diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp
index f1d70aeacb..7ea7ce57ef 100644
--- a/include/RAJA/pattern/launch/launch_core.hpp
+++ b/include/RAJA/pattern/launch/launch_core.hpp
@@ -162,7 +162,7 @@ class LaunchContext
   void *shared_mem_ptr;
 
 #if defined(RAJA_ENABLE_SYCL)
-  mutable cl::sycl::nd_item<3> *itm;
+  mutable ::sycl::nd_item<3> *itm;
 #endif
 
   RAJA_HOST_DEVICE LaunchContext()
diff --git a/include/RAJA/policy/hip/policy.hpp b/include/RAJA/policy/hip/policy.hpp
index a9f9027675..040de50f31 100644
--- a/include/RAJA/policy/hip/policy.hpp
+++ b/include/RAJA/policy/hip/policy.hpp
@@ -324,8 +324,9 @@ struct DeviceConstants
 // values for HIP warp size and max block size.
 //
 #if defined(__HIP_PLATFORM_AMD__)
-constexpr DeviceConstants device_constants(64, 1024, 64); // MI300A
-// constexpr DeviceConstants device_constants(64, 1024, 128); // MI250X
+constexpr DeviceConstants device_constants(RAJA_HIP_WAVESIZE, 1024, 64); // MI300A
+// constexpr DeviceConstants device_constants(RAJA_HIP_WAVESIZE, 1024, 128); // MI250X
+
 #elif defined(__HIP_PLATFORM_NVIDIA__)
 constexpr DeviceConstants device_constants(32, 1024, 32); // V100
 #endif
diff --git a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp
index 081a88dc23..1a8c9930dd 100644
--- a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp
+++ b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp
@@ -50,7 +50,7 @@ namespace detail
 struct syclInfo {
   sycl_dim_t gridDim{0};
   sycl_dim_t blockDim{0};
-  cl::sycl::queue qu = cl::sycl::queue();
+  ::sycl::queue qu = ::sycl::queue();
   bool setup_reducers = false;
 #if defined(RAJA_ENABLE_OPENMP)
   syclInfo* thread_states = nullptr;
@@ -62,7 +62,7 @@ extern syclInfo g_status;
 
 extern syclInfo tl_status;
 
-extern std::unordered_map<cl::sycl::queue, bool> g_queue_info_map;
+extern std::unordered_map<::sycl::queue, bool> g_queue_info_map;
 
 }  // namespace detail
 
diff --git a/include/RAJA/policy/sycl/forall.hpp b/include/RAJA/policy/sycl/forall.hpp
index 901cc694f0..1c6876e328 100644
--- a/include/RAJA/policy/sycl/forall.hpp
+++ b/include/RAJA/policy/sycl/forall.hpp
@@ -206,8 +206,8 @@ resources::EventProxy<resources::Sycl> forall_impl(resources::Sycl &sycl_res,
     }).wait(); // Need to wait for completion to free memory
 
     // Free our device memory
-    cl::sycl::free(lbody, *q);
-    cl::sycl::free(beg, *q);
+    ::sycl::free(lbody, *q);
+    ::sycl::free(beg, *q);
 
     RAJA_FT_END;
   }
diff --git a/include/RAJA/policy/sycl/kernel/Conditional.hpp b/include/RAJA/policy/sycl/kernel/Conditional.hpp
index 9149418518..e2e6b09e6d 100644
--- a/include/RAJA/policy/sycl/kernel/Conditional.hpp
+++ b/include/RAJA/policy/sycl/kernel/Conditional.hpp
@@ -51,7 +51,7 @@ struct SyclStatementExecutor<Data,
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     if (Conditional::eval(data)) {
 
diff --git a/include/RAJA/policy/sycl/kernel/For.hpp b/include/RAJA/policy/sycl/kernel/For.hpp
index d0976b931f..2019bfa7a9 100644
--- a/include/RAJA/policy/sycl/kernel/For.hpp
+++ b/include/RAJA/policy/sycl/kernel/For.hpp
@@ -59,7 +59,7 @@ struct SyclStatementExecutor<
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     auto len = segment_length<ArgumentId>(data);
     auto i = item.get_global_id(Dim);
@@ -124,7 +124,7 @@ struct SyclStatementExecutor<
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     auto len = segment_length<ArgumentId>(data);
     auto i = item.get_group(Dim);
@@ -187,7 +187,7 @@ struct SyclStatementExecutor<
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     auto len = segment_length<ArgumentId>(data);
     auto i0 = item.get_group(Dim);
@@ -253,7 +253,7 @@ struct SyclStatementExecutor<
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     auto len = segment_length<ArgumentId>(data);
     auto i = item.get_local_id(Dim);
@@ -317,7 +317,7 @@ struct SyclStatementExecutor<
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     auto len = segment_length<ArgumentId>(data);
     auto i0 = item.get_local_id(Dim);
@@ -393,7 +393,7 @@ struct SyclStatementExecutor<
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item)
   {
     auto len = segment_length<ArgumentId>(data);
     auto i = item.get_global_id(0);
@@ -454,7 +454,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
 
     using idx_type = camp::decay<decltype(camp::get<ArgumentId>(data.offset_tuple))>;
diff --git a/include/RAJA/policy/sycl/kernel/ForICount.hpp b/include/RAJA/policy/sycl/kernel/ForICount.hpp
index 9c25bb0ab9..fcd3b1824d 100644
--- a/include/RAJA/policy/sycl/kernel/ForICount.hpp
+++ b/include/RAJA/policy/sycl/kernel/ForICount.hpp
@@ -63,7 +63,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     diff_t len = segment_length<ArgumentId>(data);
     auto i = item.get_local_id(ThreadDim);
@@ -121,7 +121,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     diff_t len = segment_length<ArgumentId>(data);
     auto i0 = item.get_local_id(0);
@@ -181,7 +181,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // masked size strided loop
     diff_t len = segment_length<ArgumentId>(data);
@@ -243,7 +243,7 @@ struct SyclStatementExecutor<
   using typename Base::diff_t;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // block stride loop
     diff_t len = segment_length<ArgumentId>(data);
@@ -300,7 +300,7 @@ struct SyclStatementExecutor<
   using typename Base::diff_t;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // grid stride loop
     diff_t len = segment_length<ArgumentId>(data);
@@ -349,7 +349,7 @@ struct SyclStatementExecutor<
   using typename Base::diff_t;
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // grid stride loop
     diff_t len = segment_length<ArgumentId>(data);
@@ -399,7 +399,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     diff_t len = segment_length<ArgumentId>(data);
 
diff --git a/include/RAJA/policy/sycl/kernel/Lambda.hpp b/include/RAJA/policy/sycl/kernel/Lambda.hpp
index 0542f4b81e..05f4fb3a44 100644
--- a/include/RAJA/policy/sycl/kernel/Lambda.hpp
+++ b/include/RAJA/policy/sycl/kernel/Lambda.hpp
@@ -46,7 +46,7 @@ template <typename Data, camp::idx_t LambdaIndex, typename... Args, typename Typ
 struct SyclStatementExecutor<Data, statement::Lambda<LambdaIndex, Args...>, Types> {
 
   static
-  inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Only execute the lambda if it hasn't been masked off
     if(thread_active){
diff --git a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp
index 88c789c062..f339bccef5 100644
--- a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp
+++ b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp
@@ -93,7 +93,7 @@ namespace internal
  * SYCL global function for launching SyclKernel policies.
  */
 template <typename Data, typename Exec>
-void SyclKernelLauncher(Data data, cl::sycl::nd_item<3> item)
+void SyclKernelLauncher(Data data, ::sycl::nd_item<3> item)
 {
 
   using data_t = camp::decay<Data>;
@@ -128,7 +128,7 @@ struct SyclLaunchHelper<false,sycl_launch<async0>,StmtList,Data,Types>
   static void launch(Data &&data,
                      internal::LaunchDims launch_dims,
                      size_t shmem,
-                     cl::sycl::queue* qu)
+                     ::sycl::queue* qu)
   {
 
     //
@@ -136,20 +136,20 @@ struct SyclLaunchHelper<false,sycl_launch<async0>,StmtList,Data,Types>
     // Kernel body is nontrivially copyable, create space on device and copy to
     // Workaround until "is_device_copyable" is supported
     //
-    data_t* m_data = (data_t*) cl::sycl::malloc_device(sizeof(data_t), *qu);
+    data_t* m_data = (data_t*) ::sycl::malloc_device(sizeof(data_t), *qu);
     qu->memcpy(m_data, &data, sizeof(data_t)).wait();
 
-    qu->submit([&](cl::sycl::handler& h) {
+    qu->submit([&](::sycl::handler& h) {
  
       h.parallel_for(launch_dims.fit_nd_range(qu),
-                     [=] (cl::sycl::nd_item<3> item) {
+                     [=] (::sycl::nd_item<3> item) {
         
         SyclKernelLauncher<Data, executor_t>(*m_data, item);
 
       });
     }).wait(); // Need to wait to free memory
 
-    cl::sycl::free(m_data, *qu);
+    ::sycl::free(m_data, *qu);
 
   }
 };
@@ -172,13 +172,13 @@ struct SyclLaunchHelper<true,sycl_launch<async0>,StmtList,Data,Types>
   static void launch(Data &&data,
                      internal::LaunchDims launch_dims,
                      size_t shmem,
-                     cl::sycl::queue* qu)
+                     ::sycl::queue* qu)
   {
 
-    qu->submit([&](cl::sycl::handler& h) {
+    qu->submit([&](::sycl::handler& h) {
  
       h.parallel_for(launch_dims.fit_nd_range(qu),
-                     [=] (cl::sycl::nd_item<3> item) {
+                     [=] (::sycl::nd_item<3> item) {
 
         SyclKernelLauncher<Data, executor_t>(data, item);
 
diff --git a/include/RAJA/policy/sycl/kernel/Tile.hpp b/include/RAJA/policy/sycl/kernel/Tile.hpp
index 81a57cdecb..59590b2556 100644
--- a/include/RAJA/policy/sycl/kernel/Tile.hpp
+++ b/include/RAJA/policy/sycl/kernel/Tile.hpp
@@ -61,7 +61,7 @@ struct SyclStatementExecutor<
   using enclosed_stmts_t = SyclStatementListExecutor<Data, stmt_list_t, Types>;
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
-  static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active){
+  static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active){
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
 
@@ -139,7 +139,7 @@ struct SyclStatementExecutor<
 
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
-  static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
@@ -231,7 +231,7 @@ struct SyclStatementExecutor<
 
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
-  static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
@@ -321,7 +321,7 @@ struct SyclStatementExecutor<
 
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
-  static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
@@ -409,7 +409,7 @@ struct SyclStatementExecutor<
 
   using diff_t = segment_diff_type<ArgumentId, Data>;
 
-  static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
diff --git a/include/RAJA/policy/sycl/kernel/TileTCount.hpp b/include/RAJA/policy/sycl/kernel/TileTCount.hpp
index b1d263a263..f27dafca80 100644
--- a/include/RAJA/policy/sycl/kernel/TileTCount.hpp
+++ b/include/RAJA/policy/sycl/kernel/TileTCount.hpp
@@ -70,7 +70,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active){
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active){
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
 
@@ -141,7 +141,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
@@ -214,7 +214,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
@@ -289,7 +289,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
@@ -363,7 +363,7 @@ struct SyclStatementExecutor<
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Get the segment referenced by this Tile statement
     auto &segment = camp::get<ArgumentId>(data.segment_tuple);
diff --git a/include/RAJA/policy/sycl/kernel/internal.hpp b/include/RAJA/policy/sycl/kernel/internal.hpp
index 56e3a9aa1e..3498550c25 100644
--- a/include/RAJA/policy/sycl/kernel/internal.hpp
+++ b/include/RAJA/policy/sycl/kernel/internal.hpp
@@ -86,7 +86,7 @@ struct LaunchDims {
     return result;
   }
 
-  cl::sycl::nd_range<3> fit_nd_range(::sycl::queue* q) {
+  ::sycl::nd_range<3> fit_nd_range(::sycl::queue* q) {
 
     sycl_dim_3_t launch_global;
 
@@ -95,9 +95,9 @@ struct LaunchDims {
     launch_local.y = std::max(launch_local.y, local.y);
     launch_local.z = std::max(launch_local.z, local.z);
 
-    cl::sycl::device dev = q->get_device();
+    ::sycl::device dev = q->get_device();
 
-    auto max_work_group_size = dev.get_info< ::cl::sycl::info::device::max_work_group_size>();
+    auto max_work_group_size = dev.get_info< ::sycl::info::device::max_work_group_size>();
 
     if(launch_local.x > max_work_group_size) {
       launch_local.x = max_work_group_size;
@@ -160,10 +160,10 @@ struct LaunchDims {
       launch_global.z = ((launch_global.z / launch_local.z) + 1) * launch_local.z; 
     }
 
-    cl::sycl::range<3> ret_th = {launch_local.x, launch_local.y, launch_local.z};
-    cl::sycl::range<3> ret_gl = {launch_global.x, launch_global.y, launch_global.z};
+    ::sycl::range<3> ret_th = {launch_local.x, launch_local.y, launch_local.z};
+    ::sycl::range<3> ret_gl = {launch_global.x, launch_global.y, launch_global.z};
 
-    return cl::sycl::nd_range<3>(ret_gl, ret_th);
+    return ::sycl::nd_range<3>(ret_gl, ret_th);
   }
 };
 
@@ -176,7 +176,7 @@ struct SyclStatementListExecutorHelper {
   using cur_stmt_t = camp::at_v<StmtList, cur_stmt>;
 
   template <typename Data>
-  inline static RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  inline static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Execute stmt
     cur_stmt_t::exec(data, item, thread_active);
@@ -203,7 +203,7 @@ template <camp::idx_t num_stmts, typename StmtList>
 struct SyclStatementListExecutorHelper<num_stmts, num_stmts, StmtList> {
 
   template <typename Data>
-  inline static RAJA_DEVICE void exec(Data &, cl::sycl::nd_item<3> item, bool)
+  inline static RAJA_DEVICE void exec(Data &, ::sycl::nd_item<3> item, bool)
   {
     // nop terminator
   }
@@ -233,7 +233,7 @@ struct SyclStatementListExecutor<Data, StatementList<Stmts...>, Types> {
   static
   inline
   RAJA_DEVICE
-  void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
+  void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
   {
     // Execute statements in order with helper class
     SyclStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>::exec(data, item, thread_active);
diff --git a/include/RAJA/policy/sycl/launch.hpp b/include/RAJA/policy/sycl/launch.hpp
index ad9fecc222..c8bc7aab53 100644
--- a/include/RAJA/policy/sycl/launch.hpp
+++ b/include/RAJA/policy/sycl/launch.hpp
@@ -63,13 +63,13 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
 
       RAJA_FT_BEGIN;
 
-      q->submit([&](cl::sycl::handler& h) {
+      q->submit([&](::sycl::handler& h) {
 
         auto s_vec = ::sycl::local_accessor<char, 1> (params.shared_mem_size, h);
 
         h.parallel_for
-          (cl::sycl::nd_range<3>(gridSize, blockSize),
-           [=] (cl::sycl::nd_item<3> itm) {
+          (::sycl::nd_range<3>(gridSize, blockSize),
+           [=] (::sycl::nd_item<3> itm) {
 
             LaunchContext ctx;
             ctx.itm = &itm;
@@ -136,14 +136,14 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
       RAJA::expt::ParamMultiplexer::init<EXEC_POL>(*res);
       auto reduction = ::sycl::reduction(res, launch_reducers, combiner);
 
-      q->submit([&](cl::sycl::handler& h) {
+      q->submit([&](::sycl::handler& h) {
 
        auto s_vec = ::sycl::local_accessor<char, 1> (launch_params.shared_mem_size, h);
 
         h.parallel_for
-          (cl::sycl::nd_range<3>(gridSize, blockSize),
+          (::sycl::nd_range<3>(gridSize, blockSize),
            reduction,
-           [=] (cl::sycl::nd_item<3> itm, auto & red) {
+           [=] (::sycl::nd_item<3> itm, auto & red) {
 
             LaunchContext ctx;
             ctx.itm = &itm;
@@ -211,16 +211,16 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
       //
       using LOOP_BODY = camp::decay<BODY_IN>;
       LOOP_BODY* lbody;
-      lbody = (LOOP_BODY*) cl::sycl::malloc_device(sizeof(LOOP_BODY), *q);
+      lbody = (LOOP_BODY*) ::sycl::malloc_device(sizeof(LOOP_BODY), *q);
       q->memcpy(lbody, &body_in, sizeof(LOOP_BODY)).wait();
 
-      q->submit([&](cl::sycl::handler& h) {
+      q->submit([&](::sycl::handler& h) {
 
         auto s_vec = ::sycl::local_accessor<char, 1> (params.shared_mem_size, h);
 
         h.parallel_for
-          (cl::sycl::nd_range<3>(gridSize, blockSize),
-           [=] (cl::sycl::nd_item<3> itm) {
+          (::sycl::nd_range<3>(gridSize, blockSize),
+           [=] (::sycl::nd_item<3> itm) {
 
             LaunchContext ctx;
             ctx.itm = &itm;
@@ -234,7 +234,7 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
 
       }).wait(); // Need to wait for completion to free memory
 
-      cl::sycl::free(lbody, *q);
+      ::sycl::free(lbody, *q);
 
       RAJA_FT_END;
 
@@ -290,21 +290,21 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
       //
       using LOOP_BODY = camp::decay<BODY_IN>;
       LOOP_BODY* lbody;
-      lbody = (LOOP_BODY*) cl::sycl::malloc_device(sizeof(LOOP_BODY), *q);
+      lbody = (LOOP_BODY*) ::sycl::malloc_device(sizeof(LOOP_BODY), *q);
       q->memcpy(lbody, &body_in, sizeof(LOOP_BODY)).wait();
 
       ReduceParams* res = ::sycl::malloc_shared<ReduceParams>(1,*q);
       RAJA::expt::ParamMultiplexer::init<EXEC_POL>(*res);
       auto reduction = ::sycl::reduction(res, launch_reducers, combiner);
 
-      q->submit([&](cl::sycl::handler& h) {
+      q->submit([&](::sycl::handler& h) {
 
        auto s_vec = ::sycl::local_accessor<char, 1> (launch_params.shared_mem_size, h);
 
         h.parallel_for
-          (cl::sycl::nd_range<3>(gridSize, blockSize),
+          (::sycl::nd_range<3>(gridSize, blockSize),
            reduction,
-           [=] (cl::sycl::nd_item<3> itm, auto & red) {
+           [=] (::sycl::nd_item<3> itm, auto & red) {
 
             LaunchContext ctx;
             ctx.itm = &itm;
@@ -325,7 +325,7 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
 
       RAJA::expt::ParamMultiplexer::combine<EXEC_POL>( launch_reducers, *res );
       ::sycl::free(res, *q);
-      cl::sycl::free(lbody, *q);
+      ::sycl::free(lbody, *q);
 
       RAJA_FT_END;
     }
diff --git a/include/RAJA/policy/sycl/policy.hpp b/include/RAJA/policy/sycl/policy.hpp
index 0f92fe27e1..a02c9ea30f 100644
--- a/include/RAJA/policy/sycl/policy.hpp
+++ b/include/RAJA/policy/sycl/policy.hpp
@@ -39,7 +39,7 @@ struct uint3 {
   unsigned long x, y, z;
 };
 
-using sycl_dim_t = cl::sycl::range<1>;
+using sycl_dim_t = ::sycl::range<1>;
 
 using sycl_dim_3_t = uint3;
 
diff --git a/include/RAJA/policy/sycl/reduce.hpp b/include/RAJA/policy/sycl/reduce.hpp
index 49d89b3cd2..8a84d5b412 100644
--- a/include/RAJA/policy/sycl/reduce.hpp
+++ b/include/RAJA/policy/sycl/reduce.hpp
@@ -107,11 +107,11 @@ struct Reduce_Data
   Reduce_Data(T initValue, T identityValue, Offload_Info &info)
       : value(initValue)
   {
-    cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
+    ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
 
 
-    device = reinterpret_cast<T *>(cl::sycl::malloc_device(sycl::MaxNumTeams * sizeof(T), *(q)));
-    host = reinterpret_cast<T *>(cl::sycl::malloc_host(sycl::MaxNumTeams * sizeof(T), *(q)));
+    device = reinterpret_cast<T *>(::sycl::malloc_device(sycl::MaxNumTeams * sizeof(T), *(q)));
+    host = reinterpret_cast<T *>(::sycl::malloc_host(sycl::MaxNumTeams * sizeof(T), *(q)));
 
     if (!host) {
       printf("Unable to allocate space on host\n");
@@ -136,7 +136,7 @@ struct Reduce_Data
   //! transfers from the host to the device -- exit() is called upon failure
   RAJA_INLINE void hostToDevice(Offload_Info &info)
   {
-    cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
+    ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
 
     if(!q) {
       camp::resources::Resource res = camp::resources::Sycl();
@@ -154,7 +154,7 @@ struct Reduce_Data
   //! transfers from the device to the host -- exit() is called upon failure
   RAJA_INLINE void deviceToHost(Offload_Info &info)
   {
-    cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
+    ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
 
     if(!q) {
       camp::resources::Resource res = camp::resources::Sycl();
@@ -172,14 +172,14 @@ struct Reduce_Data
   //! frees all data from the offload information passed
   RAJA_INLINE void cleanup(Offload_Info &info)
   {
-    cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
+    ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue();
 
     if (device) {
-      cl::sycl::free(reinterpret_cast<void *>(device), *q);
+      ::sycl::free(reinterpret_cast<void *>(device), *q);
       device = nullptr;
     }
     if (host) {
-      cl::sycl::free(reinterpret_cast<void *>(host), *q);
+      ::sycl::free(reinterpret_cast<void *>(host), *q);
       //delete[] host;
       host = nullptr;
     }
@@ -243,8 +243,8 @@ struct TargetReduce
   TargetReduce &reduce(T rhsVal)
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(val.device[i]);
+    auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(val.device[i]);
     Reducer{}(atm, rhsVal);
     return *this;
 #else
@@ -257,8 +257,8 @@ struct TargetReduce
   const TargetReduce &reduce(T rhsVal) const
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(val.device[i]);
+    auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(val.device[i]);
     Reducer{}(atm, rhsVal);  
     return *this;
 #else
@@ -356,10 +356,10 @@ struct TargetReduceLoc
   TargetReduceLoc &reduce(T rhsVal, IndexType rhsLoc)
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    cl::sycl::atomic_fence(cl::sycl::memory_order_acquire, cl::sycl::memory_scope::device);
+    auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    ::sycl::atomic_fence(::sycl::memory_order_acquire, ::sycl::memory_scope::device);
     Reducer{}(val.device[i], loc.device[i], rhsVal, rhsLoc);
-    cl::sycl::atomic_fence(cl::sycl::memory_order_release, cl::sycl::memory_scope::device);
+    ::sycl::atomic_fence(::sycl::memory_order_release, ::sycl::memory_scope::device);
     return *this;
 #else
     Reducer{}(val.value, loc.value, rhsVal, rhsLoc);
@@ -415,8 +415,8 @@ class ReduceSum<sycl_reduce, T>
   const self &operator+=(T rhsVal) const
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm.fetch_add(rhsVal);
     return *this;
 #else
@@ -441,8 +441,8 @@ class ReduceBitOr<sycl_reduce, T>
   self &operator|=(T rhsVal)
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm |= rhsVal;
     return *this;
 #else
@@ -455,8 +455,8 @@ class ReduceBitOr<sycl_reduce, T>
   const self &operator|=(T rhsVal) const
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm |= rhsVal;
     return *this;
 #else
@@ -481,8 +481,8 @@ class ReduceBitAnd<sycl_reduce, T>
   self &operator&=(T rhsVal)
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm &= rhsVal;
     return *this;
 #else
@@ -495,8 +495,8 @@ class ReduceBitAnd<sycl_reduce, T>
   const self &operator&=(T rhsVal) const
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm &= rhsVal;
     return *this;
 #else
@@ -522,8 +522,8 @@ class ReduceMin<sycl_reduce, T>
   self &min(T rhsVal)
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm.fetch_min(rhsVal);
     return *this;
 #else
@@ -536,8 +536,8 @@ class ReduceMin<sycl_reduce, T>
   const self &min(T rhsVal) const
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm.fetch_min(rhsVal);
     return *this;
 #else
@@ -563,8 +563,8 @@ class ReduceMax<sycl_reduce, T>
   self &max(T rhsVal)
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm.fetch_max(rhsVal);
     return *this;
 #else
@@ -577,8 +577,8 @@ class ReduceMax<sycl_reduce, T>
   const self &max(T rhsVal) const
   {
 #ifdef __SYCL_DEVICE_ONLY__
-    auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0];
-    auto atm = ::sycl::atomic_ref<T, cl::sycl::memory_order_acq_rel, cl::sycl::memory_scope::device, cl::sycl::access::address_space::global_space>(parent::val.device[i]);
+    auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0];
+    auto atm = ::sycl::atomic_ref<T, ::sycl::memory_order_acq_rel, ::sycl::memory_scope::device, ::sycl::access::address_space::global_space>(parent::val.device[i]);
     atm.fetch_max(rhsVal);
     return *this;
 #else
diff --git a/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp b/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp
index 74bbc2f077..f1810807f9 100644
--- a/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp
+++ b/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp
@@ -57,7 +57,7 @@ namespace expt
 
 		public:
 
-      static constexpr int s_num_elem = 64;
+      static constexpr int s_num_elem = policy::hip::device_constants.WARP_SIZE;
 
       /*!
        * @brief Default constructor, zeros register contents
@@ -780,8 +780,8 @@ namespace expt
 
         // Third: mask off everything but output_segment
         //        this is because all output segments are valid at this point
-        // (5-segbits), the 5 is since the warp-width is 32 == 1<<5
-        int our_output_segment = get_lane()>>(6-segbits);
+        static constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE);
+        int our_output_segment = get_lane()>>(log2_warp_size-segbits);
         bool in_output_segment = our_output_segment == output_segment;
         if(!in_output_segment){
           result.get_raw_value() = 0;
@@ -828,8 +828,9 @@ namespace expt
 
         // First: tree reduce values within each segment
         element_type x = m_value;
+        static constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE);
         RAJA_UNROLL
-        for(int i = 0;i < 6-segbits; ++ i){
+        for(int i = 0;i < log2_warp_size-segbits; ++ i){
 
           // tree shuffle
           int delta = s_num_elem >> (i+1);
diff --git a/include/RAJA/policy/tensor/arch/hip/traits.hpp b/include/RAJA/policy/tensor/arch/hip/traits.hpp
index 4c4d959599..1b8a9679bb 100644
--- a/include/RAJA/policy/tensor/arch/hip/traits.hpp
+++ b/include/RAJA/policy/tensor/arch/hip/traits.hpp
@@ -29,7 +29,8 @@ namespace expt {
   struct RegisterTraits<RAJA::expt::hip_wave_register, T>{
       using element_type = T;
       using register_policy = RAJA::expt::hip_wave_register;
-      static constexpr camp::idx_t s_num_elem = 64;
+      static constexpr camp::idx_t s_num_elem = policy::hip::device_constants.WARP_SIZE;
+
       static constexpr camp::idx_t s_num_bits = sizeof(T) * s_num_elem;
       using int_element_type = int32_t;
   };
diff --git a/scripts/lc-builds/corona_sycl.sh b/scripts/lc-builds/corona_sycl.sh
index 815928e434..6be479a535 100755
--- a/scripts/lc-builds/corona_sycl.sh
+++ b/scripts/lc-builds/corona_sycl.sh
@@ -13,7 +13,7 @@ if [[ $# -lt 1 ]]; then
   echo "   1) SYCL compiler installation path"
   echo
   echo "For example: "
-  echo "    corona_sycl.sh /usr/workspace/raja-dev/clang_sycl_2f03ef85fee5_hip_gcc10.3.1_rocm5.7.1"
+  echo "    corona_sycl.sh /usr/workspace/raja-dev/clang_sycl_730cd3a5275f_hip_gcc10.3.1_rocm6.0.2"
   exit
 fi
 
diff --git a/src/MemUtils_SYCL.cpp b/src/MemUtils_SYCL.cpp
index 0b5f1b8be6..3a3976c675 100644
--- a/src/MemUtils_SYCL.cpp
+++ b/src/MemUtils_SYCL.cpp
@@ -49,8 +49,8 @@ syclInfo tl_status;
 #endif
 
 //! State of raja sycl queue synchronization for sycl reducer objects
-std::unordered_map<cl::sycl::queue, bool> g_queue_info_map{
-    {cl::sycl::queue(), true}};
+std::unordered_map<::sycl::queue, bool> g_queue_info_map{
+    {::sycl::queue(), true}};
 
 }  // namespace detail
 
diff --git a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp
index 8c8d051d8f..dde4273abe 100644
--- a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp
+++ b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp
@@ -34,7 +34,7 @@ void DynamicForallResourceRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last
 
   std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin);
 
-  RAJA::expt::dynamic_forall<POLICY_LIST>(working_res, pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) {
+  RAJA::dynamic_forall<POLICY_LIST>(working_res, pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) {
     working_array[RAJA::stripIndexType(idx - rbegin)] = idx;
   });
 
diff --git a/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp b/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp
index 11168b0e30..6f13f07cf5 100644
--- a/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp
+++ b/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp
@@ -40,7 +40,7 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last, const
 
     std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin);
 
-    RAJA::expt::dynamic_forall<POLICY_LIST>(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) {
+    RAJA::dynamic_forall<POLICY_LIST>(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) {
       working_array[RAJA::stripIndexType(idx - rbegin)] = idx;
     });
 
@@ -50,7 +50,7 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last, const
 
     working_res.memcpy(working_array, test_array, sizeof(INDEX_TYPE) * data_len);
 
-    RAJA::expt::dynamic_forall<POLICY_LIST>(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) {
+    RAJA::dynamic_forall<POLICY_LIST>(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) {
       (void) idx;
       working_array[0]++;
     });
diff --git a/test/include/RAJA_test-tensor.hpp b/test/include/RAJA_test-tensor.hpp
index cf633098a9..d836e1463f 100644
--- a/test/include/RAJA_test-tensor.hpp
+++ b/test/include/RAJA_test-tensor.hpp
@@ -87,7 +87,9 @@ struct TensorTestHelper<RAJA::expt::hip_wave_register>
     void exec(BODY const &body){
       hipDeviceSynchronize();
 
-      RAJA::forall<RAJA::hip_exec<64>>(RAJA::RangeSegment(0,64),
+      static constexpr int warp_size = RAJA::policy::hip::device_constants.WARP_SIZE;
+
+      RAJA::forall<RAJA::hip_exec<warp_size>>(RAJA::RangeSegment(0,warp_size),
       [=] RAJA_HOST_DEVICE (int ){
         body();
       });
diff --git a/tpl/camp b/tpl/camp
index 0f07de4240..7866dc34a4 160000
--- a/tpl/camp
+++ b/tpl/camp
@@ -1 +1 @@
-Subproject commit 0f07de4240c42e0b38a8d872a20440cb4b33d9f5
+Subproject commit 7866dc34a4f86b6c58891f6daf9f6da0f89b3d94