HCC
HCC is a single-source, C/C++ compiler for heterogeneous computing. It's optimized with HSA (http://www.hsafoundation.com/).
kalmar_launch.h
1 //===----------------------------------------------------------------------===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 
8 #pragma once
9 
10 #include "hc_defines.h"
11 #include "kalmar_runtime.h"
12 #include "kalmar_serialize.h"
13 
15 namespace Kalmar {
16 
17 template <typename Kernel>
18 static void append_kernel(const std::shared_ptr<KalmarQueue>& pQueue, const Kernel& f, void* kernel)
19 {
20  Kalmar::BufferArgumentsAppender vis(pQueue, kernel);
21  Kalmar::Serialize s(&vis);
22  f.__cxxamp_serialize(s);
23 }
24 
25 template <typename Kernel>
26 static inline std::shared_ptr<KalmarQueue> get_availabe_que(const Kernel& f)
27 {
28  Kalmar::QueueSearcher ser;
29  Kalmar::Serialize s(&ser);
30  f.__cxxamp_serialize(s);
31  if (ser.get_que())
32  return ser.get_que();
33  else
34  return getContext()->auto_select();
35 }
36 
37 #pragma clang diagnostic push
38 #pragma clang diagnostic ignored "-Wunused-variable"
39 template<typename Kernel, int dim_ext>
40 inline std::shared_ptr<KalmarAsyncOp>
41 mcw_cxxamp_launch_kernel_async(const std::shared_ptr<KalmarQueue>& pQueue, size_t *ext,
42  size_t *local_size, const Kernel& f) restrict(cpu,amp) {
43 #if __KALMAR_ACCELERATOR__ != 1
44  //Invoke Kernel::__cxxamp_trampoline as an kernel
45  //to ensure functor has right operator() defined
46  //this triggers the trampoline code being emitted
47  // FIXME: implicitly casting to avoid pointer to int error
48  int* foo = reinterpret_cast<int*>(&Kernel::__cxxamp_trampoline);
49  void *kernel = NULL;
50  {
51  std::string kernel_name(f.__cxxamp_trampoline_name());
52  kernel = CLAMP::CreateKernel(kernel_name, pQueue.get());
53  }
54  append_kernel(pQueue, f, kernel);
55  return pQueue->LaunchKernelAsync(kernel, dim_ext, ext, local_size);
56 #endif
57 }
58 #pragma clang diagnostic pop
59 
60 #pragma clang diagnostic push
61 #pragma clang diagnostic ignored "-Wunused-variable"
62 template<typename Kernel, int dim_ext>
63 inline
64 void mcw_cxxamp_launch_kernel(const std::shared_ptr<KalmarQueue>& pQueue, size_t *ext,
65  size_t *local_size, const Kernel& f) restrict(cpu,amp) {
66 #if __KALMAR_ACCELERATOR__ != 1
67  //Invoke Kernel::__cxxamp_trampoline as an kernel
68  //to ensure functor has right operator() defined
69  //this triggers the trampoline code being emitted
70  // FIXME: implicitly casting to avoid pointer to int error
71  int* foo = reinterpret_cast<int*>(&Kernel::__cxxamp_trampoline);
72  void *kernel = NULL;
73  {
74  std::string kernel_name(f.__cxxamp_trampoline_name());
75  kernel = CLAMP::CreateKernel(kernel_name, pQueue.get());
76  }
77  append_kernel(pQueue, f, kernel);
78  pQueue->LaunchKernel(kernel, dim_ext, ext, local_size);
79 #endif // __KALMAR_ACCELERATOR__
80 }
81 #pragma clang diagnostic pop
82 
83 #pragma clang diagnostic push
84 #pragma clang diagnostic ignored "-Wunused-variable"
85 template<typename Kernel>
86 inline void* mcw_cxxamp_get_kernel(const std::shared_ptr<KalmarQueue>& pQueue, const Kernel& f) restrict(cpu,amp) {
87 #if __KALMAR_ACCELERATOR__ != 1
88  //Invoke Kernel::__cxxamp_trampoline as an kernel
89  //to ensure functor has right operator() defined
90  //this triggers the trampoline code being emitted
91  // FIXME: implicitly casting to avoid pointer to int error
92  int* foo = reinterpret_cast<int*>(&Kernel::__cxxamp_trampoline);
93  void *kernel = NULL;
94  std::string kernel_name (f.__cxxamp_trampoline_name());
95  kernel = CLAMP::CreateKernel(kernel_name, pQueue.get());
96  return kernel;
97 #else
98  return NULL;
99 #endif
100 }
101 #pragma clang diagnostic pop
102 
103 template<typename Kernel, int dim_ext>
104 inline
105 void mcw_cxxamp_execute_kernel_with_dynamic_group_memory(
106  const std::shared_ptr<KalmarQueue>& pQueue, size_t *ext, size_t *local_size,
107  const Kernel& f, void *kernel, size_t dynamic_group_memory_size) restrict(cpu,amp) {
108 #if __KALMAR_ACCELERATOR__ != 1
109  append_kernel(pQueue, f, kernel);
110  pQueue->LaunchKernelWithDynamicGroupMemory(kernel, dim_ext, ext, local_size, dynamic_group_memory_size);
111 #endif // __KALMAR_ACCELERATOR__
112 }
113 
114 template<typename Kernel, int dim_ext>
115 inline std::shared_ptr<KalmarAsyncOp>
116 mcw_cxxamp_execute_kernel_with_dynamic_group_memory_async(
117  const std::shared_ptr<KalmarQueue>& pQueue, size_t *ext, size_t *local_size,
118  const Kernel& f, void *kernel, size_t dynamic_group_memory_size) restrict(cpu,amp) {
119 #if __KALMAR_ACCELERATOR__ != 1
120  append_kernel(pQueue, f, kernel);
121  return pQueue->LaunchKernelWithDynamicGroupMemoryAsync(kernel, dim_ext, ext, local_size, dynamic_group_memory_size);
122 #endif // __KALMAR_ACCELERATOR__
123 }
124 
125 } // namespace Kalmar
namespace for internal classes of Kalmar compiler / runtime
Definition: hc.hpp:42