HCC
HCC is a single-source, C/C++ compiler for heterogeneous computing. It's optimized with HSA (http://www.hsafoundation.com/).
hc_printf.hpp
1 #pragma once
2 
3 #include <type_traits>
4 #include <cstdlib>
5 #include <cstdio>
6 #include <cassert>
7 #include <atomic>
8 #include <string>
9 #include <regex>
10 #include <iostream>
11 #include <algorithm>
12 
13 #include "hc_am_internal.hpp"
14 #include "hsa_atomic.h"
15 
16 // The printf on the accelerator is only enabled when
17 // The HCC_ENABLE_ACCELERATOR_PRINTF is defined
18 //
19 //#define HCC_ENABLE_ACCELERATOR_PRINTF (1)
20 
21 // Indicate whether hc::printf is supported
22 #define HC_FEATURE_PRINTF (1)
23 
24 // Enable extra debug messages
25 #define HC_PRINTF_DEBUG (0)
26 
27 namespace hc {
28 
30  unsigned int ui;
31  int i;
32  float f;
33  void* ptr;
34  const void* cptr;
35  double d;
36 
37  // Header offset members (union uses same memory)
38  // uia[0] - PrintfPacket buffer offset
39  // uia[1] - Prtinf String buffer offset
40  // al - Using a single atomic offset of 8B, update
41  // both uias of 4B using single atomic operation.
42  // ull - used to load offsets non-atomically, and
43  // required to update atomic_ullong. Non-atomic
44  // use of ull will also run faster.
45  std::atomic_ullong al;
46  unsigned int uia[2];
47  unsigned long long ull;
48 };
49 
50 enum PrintfPacketDataType {
51  // Header types
52  PRINTF_BUFFER_SIZE = 0
53  ,PRINTF_STRING_BUFFER = 1
54  ,PRINTF_STRING_BUFFER_SIZE = 2
55  ,PRINTF_OFFSETS = 3
56  ,PRINTF_HEADER_SIZE = 4
57  ,PRINTF_MIN_SIZE = 5
58 
59  // Packet Data types
60  ,PRINTF_UNUSED
61  ,PRINTF_UNSIGNED_INT
62  ,PRINTF_SIGNED_INT
63  ,PRINTF_FLOAT
64  ,PRINTF_DOUBLE
65  ,PRINTF_VOID_PTR
66  ,PRINTF_CONST_VOID_PTR
67  ,PRINTF_CHAR_PTR
68  ,PRINTF_CONST_CHAR_PTR
69 };
70 
71 class PrintfPacket {
72 public:
73  void clear() [[hc,cpu]] { type = PRINTF_UNUSED; }
74  void set(unsigned int d) [[hc,cpu]] { type = PRINTF_UNSIGNED_INT; data.ui = d; }
75  void set(int d) [[hc,cpu]] { type = PRINTF_SIGNED_INT; data.i = d; }
76  void set(float d) [[hc,cpu]] { type = PRINTF_FLOAT; data.f = d; }
77  void set(double d) [[hc,cpu]] { type = PRINTF_DOUBLE; data.d = d; }
78  void set(void* d) [[hc,cpu]] { type = PRINTF_VOID_PTR; data.ptr = d; }
79  void set(const void* d) [[hc,cpu]] { type = PRINTF_CONST_VOID_PTR; data.cptr = d; }
80  void set(char* d) [[hc,cpu]] { type = PRINTF_CHAR_PTR; data.ptr = d; }
81  void set(const char* d) [[hc,cpu]] { type = PRINTF_CONST_CHAR_PTR; data.cptr = d; }
82  PrintfPacketDataType type;
83  PrintfPacketData data;
84 };
85 
86 // Global printf buffer
87 // The actual variable is currently defined in mcwamp_hsa.cpp
88 extern PrintfPacket* printf_buffer;
89 
90 enum PrintfError {
91  PRINTF_SUCCESS = 0
92  ,PRINTF_BUFFER_OVERFLOW = 1
93  ,PRINTF_STRING_BUFFER_OVERFLOW = 2
94  ,PRINTF_UNKNOWN_ERROR = 3
95 };
96 
97 static inline PrintfPacket* createPrintfBuffer(const unsigned int numElements) {
98  PrintfPacket* printfBuffer = NULL;
99  if (numElements > PRINTF_MIN_SIZE) {
100  printfBuffer = hc::internal::am_alloc_host_coherent(sizeof(PrintfPacket) * numElements);
101 
102  // Initialize the Header elements of the Printf Buffer
103  printfBuffer[PRINTF_BUFFER_SIZE].type = PRINTF_BUFFER_SIZE;
104  printfBuffer[PRINTF_BUFFER_SIZE].data.ui = numElements;
105 
106  // Header includes a helper string buffer which holds all char* args
107  // PrintfPacket is 12 bytes, equivalent string buffer size used
108  printfBuffer[PRINTF_STRING_BUFFER].type = PRINTF_STRING_BUFFER;
109  printfBuffer[PRINTF_STRING_BUFFER].data.ptr = hc::internal::am_alloc_host_coherent(sizeof(char) * numElements * 12);
110  printfBuffer[PRINTF_STRING_BUFFER_SIZE].type = PRINTF_STRING_BUFFER_SIZE;
111  printfBuffer[PRINTF_STRING_BUFFER_SIZE].data.ui = numElements * 12;
112 
113  // Using one atomic offset to maintain order and atomicity
114  printfBuffer[PRINTF_OFFSETS].type = PRINTF_OFFSETS;
115  printfBuffer[PRINTF_OFFSETS].data.uia[0] = PRINTF_HEADER_SIZE;
116  printfBuffer[PRINTF_OFFSETS].data.uia[1] = 0;
117  }
118  return printfBuffer;
119 }
120 
121 static inline void deletePrintfBuffer(PrintfPacket*& buffer) {
122  if (buffer){
123  if (buffer[PRINTF_STRING_BUFFER].data.ptr)
124  hc::am_free(buffer[PRINTF_STRING_BUFFER].data.ptr);
125  hc::am_free(buffer);
126  }
127  buffer = NULL;
128 }
129 
130 static inline unsigned int string_length(const char* str) [[hc,cpu]]{
131  unsigned int size = 0;
132  while(str[size]!='\0')
133  size++;
134  return size;
135 }
136 
137 static inline void copy_n(char* dest, const char* src, const unsigned int len) [[hc,cpu]] {
138  for(unsigned int i=0; i < len; i++){
139  dest[i] = src[i];
140  }
141 }
142 
143 // return the memory size (including '/0') if it's a C-string
144 template <typename T>
145 std::size_t mem_size_if_string(typename std::enable_if< std::is_same<T,const char*>::value
146  || std::is_same<T,char*>::value, T>::type s) [[hc,cpu]] {
147  return string_length(s) + 1;
148 }
149 
150 template <typename T>
151 std::size_t mem_size_if_string(typename std::enable_if< !std::is_same<T,const char*>::value
152  && !std::is_same<T,char*>::value, T>::type s) [[hc,cpu]] {
153  return 0;
154 }
155 
156 // get the argument count
157 static inline void countArg(unsigned int& count_arg, unsigned int& count_char) [[hc,cpu]] {}
158 template <typename T>
159 static inline void countArg(unsigned int& count_arg, unsigned int& count_char, const T t) [[hc,cpu]] {
160  ++count_arg;
161  count_char += mem_size_if_string<T>(t);
162 }
163 template <typename T, typename... Rest>
164 static inline void countArg(unsigned int& count_arg, unsigned int& count_char, const T t, const Rest&... rest) [[hc,cpu]] {
165  ++count_arg;
166  count_char += mem_size_if_string<T>(t);
167  countArg(count_arg, count_char, rest...);
168 }
169 
170 template<typename T>
171 PrintfError process_str_batch(PrintfPacket* queue, int poffset, unsigned int& soffset
172 , typename std::enable_if< std::is_same<T,const char*>::value || std::is_same<T,char*>::value, T>::type string) [[hc,cpu]] {
173 
174  if (queue[poffset].type != PRINTF_CHAR_PTR && queue[poffset].type != PRINTF_CONST_CHAR_PTR)
175  return PRINTF_UNKNOWN_ERROR;
176 
177  unsigned int str_len = string_length(string);
178  unsigned int sb_offset = soffset;
179  char* string_buffer = (char*) queue[PRINTF_STRING_BUFFER].data.ptr;
180  if (!string_buffer || soffset + str_len + 1 > queue[PRINTF_STRING_BUFFER_SIZE].data.ui){
181  return PRINTF_STRING_BUFFER_OVERFLOW;
182  }
183  copy_n(&string_buffer[sb_offset], string, str_len + 1);
184  queue[poffset].set(&string_buffer[sb_offset]);
185  soffset += str_len + 1;
186  return PRINTF_SUCCESS;
187 }
188 
189 template<typename T>
190 PrintfError process_str_batch(PrintfPacket* queue, int poffset, unsigned int& soffset
191 , typename std::enable_if< !std::is_same<T,const char*>::value && !std::is_same<T,char*>::value, T>::type data) [[hc,cpu]] {
192 
193  if (queue[poffset].type == PRINTF_CHAR_PTR || queue[poffset].type == PRINTF_CONST_CHAR_PTR)
194  return PRINTF_UNKNOWN_ERROR;
195  else
196  return PRINTF_SUCCESS;
197 }
198 
199 template <typename T>
200 static inline PrintfError set_batch(PrintfPacket* queue, int poffset, unsigned int& soffset, const T t) [[hc,cpu]] {
201  PrintfError err = PRINTF_SUCCESS;
202  queue[poffset].set(t);
203  err = process_str_batch<T>(queue, poffset, soffset, t);
204  return err;
205 }
206 
207 template <typename T, typename... Rest>
208 static inline PrintfError set_batch(PrintfPacket* queue, int poffset, unsigned int& soffset, const T t, Rest... rest) [[hc,cpu]] {
209  PrintfError err = PRINTF_SUCCESS;
210  queue[poffset].set(t);
211 
212  if ((err = process_str_batch<T>(queue, poffset, soffset, t)) != PRINTF_SUCCESS)
213  return err;
214 
215  return set_batch(queue, poffset + 1, soffset, rest...);
216 }
217 
218 template <typename... All>
219 static inline PrintfError printf(PrintfPacket* queue, All... all) [[hc,cpu]] {
220  unsigned int count_arg = 0;
221  unsigned int count_char = 0;
222  countArg(count_arg, count_char, all...);
223 
224  PrintfError error = PRINTF_SUCCESS;
225  PrintfPacketData old_off, try_off;
226 
227  if (!queue || count_arg + 1 + queue[PRINTF_OFFSETS].data.uia[0] > queue[PRINTF_BUFFER_SIZE].data.ui) {
228  error = PRINTF_BUFFER_OVERFLOW;
229  }
230  else if (!queue[PRINTF_STRING_BUFFER].data.ptr || count_char + queue[PRINTF_OFFSETS].data.uia[1] > queue[PRINTF_STRING_BUFFER_SIZE].data.ui){
231  error = PRINTF_STRING_BUFFER_OVERFLOW;
232  }
233  else {
234  do {
235  // Suggest an offset and compete with other kernels for a spot.
236  // One kernel will make it through at a time. Attempt
237  // to win a portion of printf buffer and printf string buffer.
238  // Otherwise, update to latest offset values, and try again.
239  old_off.ull = queue[PRINTF_OFFSETS].data.al.load();
240  try_off.uia[0] = old_off.uia[0] + count_arg + 1;
241  try_off.uia[1] = old_off.uia[1] + count_char;
242  } while(!(queue[PRINTF_OFFSETS].data.al.compare_exchange_weak(old_off.ull, try_off.ull)));
243 
244  unsigned int poffset = (unsigned int)old_off.uia[0];
245  unsigned int soffset = (unsigned int)old_off.uia[1];
246 
247  if (poffset + count_arg + 1 > queue[PRINTF_BUFFER_SIZE].data.ui) {
248  error = PRINTF_BUFFER_OVERFLOW;
249  }
250  else if (soffset + count_char > queue[PRINTF_STRING_BUFFER_SIZE].data.ui){
251  error = PRINTF_STRING_BUFFER_OVERFLOW;
252  }
253  else {
254  if (set_batch(queue, poffset, soffset, count_arg, all...) != PRINTF_SUCCESS)
255  error = PRINTF_STRING_BUFFER_OVERFLOW;
256  }
257  }
258 
259  return error;
260 }
261 
262 
263 // The presence of hc::printf may impact performance even when it's not being called.
264 // Currently hcc's printf on accelerator is an opt-in feature. This means that users
265 // have to define HCC_ENABLE_ACCELERATOR_PRINTF to enable it.
266 #ifdef HCC_ENABLE_ACCELERATOR_PRINTF
267 
268 template <typename... All>
269 static inline PrintfError printf(const char* format_string, All... all) [[hc,cpu]] {
270  return printf(hc::printf_buffer, format_string, all...);
271 }
272 
273 #else
274 
275 // this is just a stubs for printf that doesn't do anything
276 template <typename... All>
277 static inline PrintfError printf(const char* format_string, All... all) [[hc,cpu]] {
278  return PRINTF_SUCCESS;
279 }
280 
281 #endif
282 
283 // regex for finding format string specifiers
284 static std::regex specifierPattern("(%){1}[-+#0]*[0-9]*((.)[0-9]+){0,1}([diuoxXfFeEgGaAcsp]){1}");
285 static std::regex signedIntegerPattern("(%){1}[-+#0]*[0-9]*((.)[0-9]+){0,1}([cdi]){1}");
286 static std::regex unsignedIntegerPattern("(%){1}[-+#0]*[0-9]*((.)[0-9]+){0,1}([uoxX]){1}");
287 static std::regex floatPattern("(%){1}[-+#0]*[0-9]*((.)[0-9]+){0,1}([fFeEgGaA]){1}");
288 static std::regex pointerPattern("(%){1}[ps]");
289 static std::regex doubleAmpersandPattern("(%){2}");
290 
291 static inline void processPrintfPackets(PrintfPacket* packets, const unsigned int numPackets) {
292 
293  for (unsigned int i = 0; i < numPackets; ) {
294 
295  unsigned int numPrintfArgs = packets[i++].data.ui;
296  if (numPrintfArgs == 0)
297  continue;
298 
299  // get the format
300  unsigned int formatStringIndex = i++;
301  assert(packets[formatStringIndex].type == PRINTF_CHAR_PTR
302  || packets[formatStringIndex].type == PRINTF_CONST_CHAR_PTR);
303  std::string formatString((const char*)packets[formatStringIndex].data.cptr);
304  std::smatch specifierMatches;
305 
306 #if HC_PRINTF_DEBUG
307  std::printf("%s:%d \t number of matches = %d\n", __FUNCTION__, __LINE__, (int)specifierMatches.size());
308 #endif
309 
310  for (unsigned int j = 1; j < numPrintfArgs; ++j, ++i) {
311 
312  if (!std::regex_search(formatString, specifierMatches, specifierPattern)) {
313  // More printf argument than format specifier??
314  // Just skip to the next printf request
315  break;
316  }
317 
318  std::string specifier = specifierMatches.str();
319 #if HC_PRINTF_DEBUG
320  std::cout << " (specifier found: " << specifier << ") ";
321 #endif
322 
323  // print the substring before the specifier
324  // clean up all the double ampersands
325  std::string prefix = specifierMatches.prefix();
326  prefix = std::regex_replace(prefix,doubleAmpersandPattern,"%");
327  std::printf("%s",prefix.c_str());
328 
329  std::smatch specifierTypeMatch;
330  if (std::regex_search(specifier, specifierTypeMatch, unsignedIntegerPattern)) {
331  std::printf(specifier.c_str(), packets[i].data.ui);
332  } else if (std::regex_search(specifier, specifierTypeMatch, signedIntegerPattern)) {
333  std::printf(specifier.c_str(), packets[i].data.i);
334  } else if (std::regex_search(specifier, specifierTypeMatch, floatPattern)) {
335  if (packets[i].type == PRINTF_FLOAT)
336  std::printf(specifier.c_str(), packets[i].data.f);
337  else
338  std::printf(specifier.c_str(), packets[i].data.d);
339  } else if (std::regex_search(specifier, specifierTypeMatch, pointerPattern)) {
340  std::printf(specifier.c_str(), packets[i].data.cptr);
341  }
342  else {
343  assert(false);
344  }
345  formatString = specifierMatches.suffix();
346  }
347  // print the substring after the last specifier
348  // clean up all the double ampersands before printing
349  formatString = std::regex_replace(formatString,doubleAmpersandPattern,"%");
350  std::printf("%s",formatString.c_str());
351  }
352  std::flush(std::cout);
353 }
354 
355 static inline void processPrintfBuffer(PrintfPacket* gpuBuffer) {
356 
357  if (gpuBuffer == nullptr) return;
358 
359  unsigned int cursor = gpuBuffer[PRINTF_OFFSETS].data.uia[0];
360 
361  // check whether the printf buffer is non-empty
362  if (cursor != PRINTF_HEADER_SIZE) {
363  unsigned int bufferSize = gpuBuffer[PRINTF_BUFFER_SIZE].data.ui;
364  unsigned int numPackets = ((bufferSize<cursor)?bufferSize:cursor) - PRINTF_HEADER_SIZE;
365 
366  processPrintfPackets(gpuBuffer+PRINTF_HEADER_SIZE, numPackets);
367 
368  // reset the printf buffer and string buffer
369  gpuBuffer[PRINTF_OFFSETS].data.uia[0] = PRINTF_HEADER_SIZE;
370  gpuBuffer[PRINTF_OFFSETS].data.uia[1] = 0;
371  }
372 }
373 
374 
375 } // namespace hc
Definition: hc_printf.hpp:29
Parallel algorithms.
am_status_t am_free(void *ptr)
Free a block of memory previously allocated with am_alloc.
Heterogeneous C++ (HC) namespace.
Definition: grid_launch.h:10
Definition: hc_printf.hpp:71