|
uint64_t | hc::get_system_ticks () |
| Get the current tick count for the GPU platform. More...
|
|
uint64_t | hc::get_tick_frequency () |
| Get the frequency of ticks per second for the underlying asynchrnous operation. More...
|
|
unsigned int | hc::__wavesize () __HC__ |
|
unsigned int | hc::__popcount_u32_b32 (unsigned int input) __HC__ |
| Count number of 1 bits in the input. More...
|
|
unsigned int | hc::__popcount_u32_b64 (unsigned long long int input) __HC__ |
| Count number of 1 bits in the input. More...
|
|
unsigned int | hc::__firstbit_u32_u32 (unsigned int input) __HC__ |
| Count leading zero bits in the input. More...
|
|
unsigned int | hc::__firstbit_u32_u64 (unsigned long long int input) __HC__ |
| Count leading zero bits in the input. More...
|
|
unsigned int | hc::__firstbit_u32_s32 (int input) __HC__ |
| Count leading zero bits in the input. More...
|
|
unsigned int | hc::__firstbit_u32_s64 (long long int input) __HC__ |
| Count leading zero bits in the input. More...
|
|
unsigned int | hc::__bitalign_b32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Align 32 bits within 64 bits of data on an arbitrary bit boundary. More...
|
|
unsigned int | hc::__bytealign_b32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Align 32 bits within 64 bis of data on an arbitrary byte boundary. More...
|
|
unsigned int | hc::__lerp_u8x4 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Do linear interpolation and computes the unsigned 8-bit average of packed data. More...
|
|
unsigned int | hc::__packcvt_u8x4_f32 (float src0, float src1, float src2, float src3) __HC__ |
| Takes four floating-point number, convers them to unsigned integer values, and packs them into a packed u8x4 value. More...
|
|
float | hc::__unpackcvt_f32_u8x4 (unsigned int src0, unsigned int src1) __HC__ |
| Unpacks a single element from a packed u8x4 value and converts it to an f32. More...
|
|
unsigned int | hc::__sadhi_u16x2_u8x4 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| This function is mostly the same as sad except the sum of absolute differences is added to the most significant 16 bits of the result. More...
|
|
uint64_t | hc::__clock_u64 () __HC__ |
| Get system timestamp.
|
|
uint64_t | hc::__cycle_u64 () __HC__ |
| Get hardware cycle count. More...
|
|
unsigned int | hc::__activelaneid_u32 () __HC__ |
| Get the count of the number of earlier (in flattened work-item order) active work-items within the same wavefront. More...
|
|
uint64_t | hc::__activelanemask_v4_b64_b1 (unsigned int input) __HC__ |
| Return a bit mask shows which active work-items in the wavefront have a non-zero input. More...
|
|
unsigned int | hc::__activelanecount_u32_b1 (unsigned int input) __HC__ |
| Count the number of active work-items in the current wavefront that have a non-zero input. More...
|
|
int | hc::__any (int predicate) __HC__ |
| Evaluate predicate for all active work-items in the wavefront and return non-zero if and only if predicate evaluates to non-zero for all of them.
|
|
int | hc::__all (int predicate) __HC__ |
| Evaluate predicate for all active work-items in the wavefront and return non-zero if and only if predicate evaluates to non-zero for any of them.
|
|
uint64_t | hc::__ballot (int predicate) __HC__ |
| Evaluate predicate for all active work-items in the wavefront and return an integer whose Nth bit is set if and only if predicate evaluates to non-zero for the Nth work-item of the wavefront and the Nth work-item is active.
|
|
unsigned int | hc::__shfl_xor (unsigned int var, int laneMask, int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
|
unsigned int | hc::__mul24 (unsigned int x, unsigned int y)[[hc]] |
| Multiply two unsigned integers (x,y) but only the lower 24 bits will be used in the multiplication. More...
|
|
int | hc::__mul24 (int x, int y)[[hc]] |
| Multiply two integers (x,y) but only the lower 24 bits will be used in the multiplication. More...
|
|
unsigned int | hc::__mad24 (unsigned int x, unsigned int y, unsigned int z)[[hc]] |
| Multiply two unsigned integers (x,y) but only the lower 24 bits will be used in the multiplication and then add the product to a 32-bit unsigned integer. More...
|
|
int | hc::__mad24 (int x, int y, int z)[[hc]] |
| Multiply two integers (x,y) but only the lower 24 bits will be used in the multiplication and then add the product to a 32-bit integer. More...
|
|
void | hc::abort () __HC__ |
|
unsigned int | hc::get_group_segment_size () __HC__ |
| Fetch the size of group segment. More...
|
|
unsigned int | hc::get_static_group_segment_size () __HC__ |
| Fetch the size of static group segment. More...
|
|
void * | hc::get_group_segment_base_pointer () __HC__ |
| Fetch the address of the beginning of group segment.
|
|
void * | hc::get_dynamic_group_segment_base_pointer () __HC__ |
| Fetch the address of the beginning of dynamic group segment.
|
|
void | hc::all_memory_fence (const tile_barrier &) __HC__ |
| Establishes a thread-tile scoped memory fence for both global and tile-static memory operations. More...
|
|
void | hc::global_memory_fence (const tile_barrier &) __HC__ |
| Establishes a thread-tile scoped memory fence for global (but not tile-static) memory operations. More...
|
|
void | hc::tile_static_memory_fence (const tile_barrier &) __HC__ |
| Establishes a thread-tile scoped memory fence for tile-static (but not global) memory operations. More...
|
|
template<int N> |
const extent< N > & | hc::check (const extent< N > &ext) |
|
template<typename T , int N> |
void | hc::copy (const array< T, N > &src, array< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename OutputIter , typename T , int N> |
void | hc::copy (const array_view< T, N > &src, OutputIter destBegin) |
| The contents of a source array are copied into "dest" starting with iterator destBegin. More...
|
|
template<typename OutputIter , typename T , int N> |
void | hc::copy (const array< T, N > &src, OutputIter destBegin) |
| The contents of a source array are copied into "dest" starting with iterator destBegin. More...
|
|
template<typename T , int N> |
completion_future | hc::copy_async (const array< T, N > &src, array< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T , int N> |
completion_future | hc::copy_async (const array< T, N > &src, const array_view< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename OutputIter , typename T , int N> |
completion_future | hc::copy_async (const array< T, N > &src, OutputIter destBegin) |
| The contents of a source array are copied into "dest" starting with iterator destBegin. More...
|
|
template<typename OutputIter , typename T , int N> |
completion_future | hc::copy_async (const array_view< T, N > &src, OutputIter destBegin) |
| The contents of a source array are copied into "dest" starting with iterator destBegin. More...
|
|
template<typename T , int N> |
completion_future | hc::copy_async (const array< T, N > &src, const array< T, N > &dest) |
|
template<typename T , int N> |
completion_future | hc::copy_async (const array_view< const T, N > &src, const array< T, N > &dest) |
|
template<typename T , int N> |
completion_future | hc::copy_async (const array_view< T, N > &src, const array< T, N > &dest) |
|
unsigned int | hc::__atomic_wrapinc (unsigned int *address, unsigned int val) __HC__ |
| Atomically do the following operations: More...
|
|
unsigned int | hc::__atomic_wrapdec (unsigned int *address, unsigned int val) __HC__ |
| Atomically do the following operations: More...
|
|
template<int N, typename Kernel > |
completion_future | hc::parallel_for_each (const accelerator_view &, const extent< N > &, const Kernel &) |
|
template<typename Kernel > |
completion_future | hc::parallel_for_each (const accelerator_view &, const tiled_extent< 3 > &, const Kernel &) |
|
template<typename Kernel > |
completion_future | hc::parallel_for_each (const accelerator_view &, const tiled_extent< 2 > &, const Kernel &) |
|
template<typename Kernel > |
completion_future | hc::parallel_for_each (const accelerator_view &, const tiled_extent< 1 > &, const Kernel &) |
|
template<int N, typename Kernel > |
completion_future | hc::parallel_for_each (const extent< N > &compute_domain, const Kernel &f) |
|
template<typename Kernel > |
completion_future | hc::parallel_for_each (const tiled_extent< 3 > &compute_domain, const Kernel &f) |
|
template<typename Kernel > |
completion_future | hc::parallel_for_each (const tiled_extent< 2 > &compute_domain, const Kernel &f) |
|
template<typename Kernel > |
completion_future | hc::parallel_for_each (const tiled_extent< 1 > &compute_domain, const Kernel &f) |
|
template<int N, typename Kernel > |
| hc::__attribute__ ((noinline, used)) completion_future parallel_for_each(const accelerator_view &av |
|
| hc::if (av.get_accelerator().get_device_path()==L"cpu") |
|
return | hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< pfe_wrapper< N, Kernel >, 3 >(av.pQueue, ext, NULL, _pf)) |
|
template<typename Kernel > |
| hc::__attribute__ ((noinline, used)) completion_future parallel_for_each(const accelerator_view &av |
|
| hc::if (compute_domain[0]< 0) |
|
| hc::if (static_cast< size_t >(compute_domain[0]) > 4294967295L) throw invalid_compute_domain("Extent size too large.") |
|
return | hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< Kernel, 1 >(av.pQueue,&ext, NULL, f)) |
|
| hc::if (static_cast< size_t >(compute_domain[1]) > 4294967295L) throw invalid_compute_domain("Extent size too large.") |
|
return | hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< Kernel, 2 >(av.pQueue, ext, NULL, f)) |
|
| hc::if (static_cast< size_t >(compute_domain[2]) > 4294967295L) throw invalid_compute_domain("Extent size too large.") |
|
return | hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< Kernel, 3 >(av.pQueue, ext, NULL, f)) |
|
return | hc::completion_future (Kalmar::mcw_cxxamp_execute_kernel_with_dynamic_group_memory_async< Kernel, 1 >(av.pQueue,&ext,&tile, f, kernel, compute_domain.get_dynamic_group_segment_size())) |
|
return | hc::completion_future (Kalmar::mcw_cxxamp_execute_kernel_with_dynamic_group_memory_async< Kernel, 2 >(av.pQueue, ext, tile, f, kernel, compute_domain.get_dynamic_group_segment_size())) |
|
return | hc::completion_future (Kalmar::mcw_cxxamp_execute_kernel_with_dynamic_group_memory_async< Kernel, 3 >(av.pQueue, ext, tile, f, kernel, compute_domain.get_dynamic_group_segment_size())) |
|
|
template<int N> |
extent< N > | hc::operator+ (const extent< N > &lhs, const extent< N > &rhs) __CPU__ __HC__ |
| Adds (or subtracts) two objects of extent<N> to form a new extent. More...
|
|
template<int N> |
extent< N > | hc::operator- (const extent< N > &lhs, const extent< N > &rhs) __CPU__ __HC__ |
| Adds (or subtracts) two objects of extent<N> to form a new extent. More...
|
|
|
template<int N> |
extent< N > | hc::operator+ (const extent< N > &ext, int value) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator+ (int value, const extent< N > &ext) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator- (const extent< N > &ext, int value) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator- (int value, const extent< N > &ext) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator* (const extent< N > &ext, int value) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator* (int value, const extent< N > &ext) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator/ (const extent< N > &ext, int value) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator/ (int value, const extent< N > &ext) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator% (const extent< N > &ext, int value) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
template<int N> |
extent< N > | hc::operator% (int value, const extent< N > &ext) __CPU__ __HC__ |
| Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...
|
|
|
unsigned int | hc::__bitextract_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Extract a range of bits. More...
|
|
uint64_t | hc::__bitextract_u64 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__ |
| Extract a range of bits. More...
|
|
int | hc::__bitextract_s32 (int src0, unsigned int src1, unsigned int src2) __HC__ |
| Extract a range of bits. More...
|
|
int64_t | hc::__bitextract_s64 (int64_t src0, unsigned int src1, unsigned int src2) __HC__ |
| Extract a range of bits. More...
|
|
|
unsigned int | hc::__bitinsert_u32 (unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) __HC__ |
| Replace a range of bits. More...
|
|
uint64_t | hc::__bitinsert_u64 (uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) __HC__ |
| Replace a range of bits. More...
|
|
int | hc::__bitinsert_s32 (int src0, int src1, unsigned int src2, unsigned int src3) __HC__ |
| Replace a range of bits. More...
|
|
int64_t | hc::__bitinsert_s64 (int64_t src0, int64_t src1, unsigned int src2, unsigned int src3) __HC__ |
| Replace a range of bits. More...
|
|
|
unsigned int | hc::__bitmask_b32 (unsigned int src0, unsigned int src1) __HC__ |
| Create a bit mask that can be used with bitselect. More...
|
|
uint64_t | hc::__bitmask_b64 (unsigned int src0, unsigned int src1) __HC__ |
| Create a bit mask that can be used with bitselect. More...
|
|
|
unsigned int | hc::__bitrev_b32 (unsigned int src0)[[hc]] __asm("llvm.bitreverse.i32") |
| Reverse the bits. More...
|
|
uint64_t | hc::__bitrev_b64 (uint64_t src0)[[hc]] __asm("llvm.bitreverse.i64") |
| Reverse the bits. More...
|
|
|
unsigned int | hc::__bitselect_b32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Do bit field selection. More...
|
|
uint64_t | hc::__bitselect_b64 (uint64_t src0, uint64_t src1, uint64_t src2) __HC__ |
| Do bit field selection. More...
|
|
|
unsigned int | hc::__lastbit_u32_u32 (unsigned int input) __HC__ |
| Find the first bit set to 1 in a number starting from the least significant bit. More...
|
|
unsigned int | hc::__lastbit_u32_u64 (unsigned long long int input) __HC__ |
| Find the first bit set to 1 in a number starting from the least significant bit. More...
|
|
unsigned int | hc::__lastbit_u32_s32 (int input) __HC__ |
| Find the first bit set to 1 in a number starting from the least significant bit. More...
|
|
unsigned int | hc::__lastbit_u32_s64 (unsigned long long input) __HC__ |
| Find the first bit set to 1 in a number starting from the least significant bit. More...
|
|
|
unsigned int | hc::__unpacklo_u8x4 (unsigned int src0, unsigned int src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
uint64_t | hc::__unpacklo_u8x8 (uint64_t src0, uint64_t src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
unsigned int | hc::__unpacklo_u16x2 (unsigned int src0, unsigned int src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
uint64_t | hc::__unpacklo_u16x4 (uint64_t src0, uint64_t src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
uint64_t | hc::__unpacklo_u32x2 (uint64_t src0, uint64_t src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
int | hc::__unpacklo_s8x4 (int src0, int src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
int64_t | hc::__unpacklo_s8x8 (int64_t src0, int64_t src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
int | hc::__unpacklo_s16x2 (int src0, int src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
int64_t | hc::__unpacklo_s16x4 (int64_t src0, int64_t src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
int64_t | hc::__unpacklo_s32x2 (int64_t src0, int64_t src1) __HC__ |
| Copy and interleave the lower half of the elements from each source into the desitionation. More...
|
|
|
unsigned int | hc::__unpackhi_u8x4 (unsigned int src0, unsigned int src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
uint64_t | hc::__unpackhi_u8x8 (uint64_t src0, uint64_t src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
unsigned int | hc::__unpackhi_u16x2 (unsigned int src0, unsigned int src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
uint64_t | hc::__unpackhi_u16x4 (uint64_t src0, uint64_t src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
uint64_t | hc::__unpackhi_u32x2 (uint64_t src0, uint64_t src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
int | hc::__unpackhi_s8x4 (int src0, int src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
int64_t | hc::__unpackhi_s8x8 (int64_t src0, int64_t src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
int | hc::__unpackhi_s16x2 (int src0, int src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
int64_t | hc::__unpackhi_s16x4 (int64_t src0, int64_t src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
int64_t | hc::__unpackhi_s32x2 (int64_t src0, int64_t src1) __HC__ |
| Copy and interleave the upper half of the elements from each source into the desitionation. More...
|
|
|
unsigned int | hc::__pack_u8x4_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
uint64_t | hc::__pack_u8x8_u32 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
unsigned | hc::__pack_u16x2_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
uint64_t | hc::__pack_u16x4_u32 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
uint64_t | hc::__pack_u32x2_u32 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
int | hc::__pack_s8x4_s32 (int src0, int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
int64_t | hc::__pack_s8x8_s32 (int64_t src0, int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
int | hc::__pack_s16x2_s32 (int src0, int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
int64_t | hc::__pack_s16x4_s32 (int64_t src0, int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
int64_t | hc::__pack_s32x2_s32 (int64_t src0, int src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
double | hc::__pack_f32x2_f32 (double src0, float src1, unsigned int src2) __HC__ |
| Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...
|
|
|
unsigned int | hc::__unpack_u32_u8x4 (unsigned int src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
unsigned int | hc::__unpack_u32_u8x8 (uint64_t src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
unsigned int | hc::__unpack_u32_u16x2 (unsigned int src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
unsigned int | hc::__unpack_u32_u16x4 (uint64_t src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
unsigned int | hc::__unpack_u32_u32x2 (uint64_t src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
int | hc::__unpack_s32_s8x4 (int src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
int | hc::__unpack_s32_s8x8 (int64_t src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
int | hc::__unpack_s32_s16x2 (int src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
int | hc::__unpack_s32_s16x4 (int64_t src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
int | hc::__unpack_s32_s3x2 (int64_t src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
float | hc::__unpack_f32_f32x2 (double src0, unsigned int src1) __HC__ |
| Assign the elements specified by src1 from the packed value in src0. More...
|
|
|
unsigned int | hc::__sad_u32_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Computes the sum of the absolute differences of src0 and src1 and then adds src2 to the result. More...
|
|
unsigned int | hc::__sad_u32_u16x2 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Computes the sum of the absolute differences of src0 and src1 and then adds src2 to the result. More...
|
|
unsigned int | hc::__sad_u32_u8x4 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__ |
| Computes the sum of the absolute differences of src0 and src1 and then adds src2 to the result. More...
|
|
|
int | hc::__amdgcn_mbcnt_lo (int mask, int src)[[hc]] __asm("llvm.amdgcn.mbcnt.lo") |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_mbcnt_hi (int mask, int src)[[hc]] __asm("llvm.amdgcn.mbcnt.hi") |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__lane_id (void)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_ds_bpermute (int index, int src)[[hc]] __asm("llvm.amdgcn.ds.bpermute") |
| ds_bpermute intrinsic FIXME: We need to add __builtin_amdgcn_ds_bpermute to clang and call it here instead.
|
|
unsigned int | hc::__amdgcn_ds_bpermute (int index, unsigned int src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__amdgcn_ds_bpermute (int index, float src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_ds_permute (int index, int src)[[hc]] |
| ds_permute intrinsic
|
|
unsigned int | hc::__amdgcn_ds_permute (int index, unsigned int src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__amdgcn_ds_permute (int index, float src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_ds_swizzle (int src, int pattern)[[hc]] |
| ds_swizzle intrinsic
|
|
unsigned int | hc::__amdgcn_ds_swizzle (unsigned int src, int pattern)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__amdgcn_ds_swizzle (float src, int pattern)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_move_dpp (int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl)[[hc]] |
| move DPP intrinsic
|
|
int | hc::__amdgcn_wave_sr1 (int src, bool bound_ctrl)[[hc]] |
| Shift the value of src to the right by one thread within a wavefront. More...
|
|
unsigned int | hc::__amdgcn_wave_sr1 (unsigned int src, bool bound_ctrl)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__amdgcn_wave_sr1 (float src, bool bound_ctrl)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_wave_sl1 (int src, bool bound_ctrl)[[hc]] |
| Shift the value of src to the left by one thread within a wavefront. More...
|
|
unsigned int | hc::__amdgcn_wave_sl1 (unsigned int src, bool bound_ctrl)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__amdgcn_wave_sl1 (float src, bool bound_ctrl)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_wave_rr1 (int src)[[hc]] |
| Rotate the value of src to the right by one thread within a wavefront. More...
|
|
unsigned int | hc::__amdgcn_wave_rr1 (unsigned int src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__amdgcn_wave_rr1 (float src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__amdgcn_wave_rl1 (int src)[[hc]] |
| Rotate the value of src to the left by one thread within a wavefront. More...
|
|
unsigned int | hc::__amdgcn_wave_rl1 (unsigned int src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__amdgcn_wave_rl1 (float src)[[hc]] |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
int | hc::__shfl (int var, int srcLane, int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
unsigned int | hc::__shfl (unsigned int var, int srcLane, int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
float | hc::__shfl (float var, int srcLane, int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Direct copy from indexed active work-item within a wavefront. More...
|
|
|
int | hc::__shfl_up (int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item with lower ID relative to caller within a wavefront. More...
|
|
unsigned int | hc::__shfl_up (unsigned int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item with lower ID relative to caller within a wavefront. More...
|
|
float | hc::__shfl_up (float var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item with lower ID relative to caller within a wavefront. More...
|
|
|
int | hc::__shfl_down (int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item with higher ID relative to caller within a wavefront. More...
|
|
unsigned int | hc::__shfl_down (unsigned int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item with higher ID relative to caller within a wavefront. More...
|
|
float | hc::__shfl_down (float var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item with higher ID relative to caller within a wavefront. More...
|
|
|
int | hc::__shfl_xor (int var, int laneMask, int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item based on bitwise XOR of caller work-item ID within a wavefront. More...
|
|
float | hc::__shfl_xor (float var, int laneMask, int width=__HSA_WAVEFRONT_SIZE__) __HC__ |
| Copy from an active work-item based on bitwise XOR of caller work-item ID within a wavefront. More...
|
|
|
template<typename T , int N> |
void | hc::copy (const array_view< const T, N > &src, const array_view< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T , int N> |
void | hc::copy (const array_view< T, N > &src, const array_view< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T > |
void | hc::copy (const array_view< const T, 1 > &src, const array_view< T, 1 > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
|
template<typename T , int N> |
void | hc::copy (const array< T, N > &src, const array_view< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T > |
void | hc::copy (const array< T, 1 > &src, const array_view< T, 1 > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
|
template<typename T , int N> |
void | hc::copy (const array_view< const T, N > &src, array< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T , int N> |
void | hc::copy (const array_view< T, N > &src, array< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T > |
void | hc::copy (const array_view< const T, 1 > &src, array< T, 1 > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
|
template<typename InputIter , typename T , int N> |
void | hc::copy (InputIter srcBegin, InputIter srcEnd, const array_view< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
template<typename InputIter , typename T , int N> |
void | hc::copy (InputIter srcBegin, const array_view< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
|
template<typename InputIter , typename T , int N> |
void | hc::copy (InputIter srcBegin, InputIter srcEnd, array< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
template<typename InputIter , typename T , int N> |
void | hc::copy (InputIter srcBegin, array< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
|
template<typename T , int N> |
completion_future | hc::copy_async (const array_view< const T, N > &src, array< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T , int N> |
completion_future | hc::copy_async (const array_view< T, N > &src, array< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
|
template<typename T , int N> |
completion_future | hc::copy_async (const array_view< const T, N > &src, const array_view< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
template<typename T , int N> |
completion_future | hc::copy_async (const array_view< T, N > &src, const array_view< T, N > &dest) |
| The contents of "src" are copied into "dest". More...
|
|
|
template<typename InputIter , typename T , int N> |
completion_future | hc::copy_async (InputIter srcBegin, InputIter srcEnd, array< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
template<typename InputIter , typename T , int N> |
completion_future | hc::copy_async (InputIter srcBegin, array< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
|
template<typename InputIter , typename T , int N> |
completion_future | hc::copy_async (InputIter srcBegin, InputIter srcEnd, const array_view< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
template<typename InputIter , typename T , int N> |
completion_future | hc::copy_async (InputIter srcBegin, const array_view< T, N > &dest) |
| The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...
|
|
|
unsigned int | hc::atomic_exchange (unsigned int *dest, unsigned int val) __CPU__ __HC__ |
| Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...
|
|
int | hc::atomic_exchange (int *dest, int val) __CPU__ __HC__ |
| Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...
|
|
float | hc::atomic_exchange (float *dest, float val) __CPU__ __HC__ |
| Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...
|
|
uint64_t | hc::atomic_exchange (uint64_t *dest, uint64_t val) __CPU__ __HC__ |
| Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...
|
|
|
bool | hc::atomic_compare_exchange (unsigned int *dest, unsigned int *expected_val, unsigned int val) __CPU__ __HC__ |
| These functions attempt to perform these three steps atomically: More...
|
|
bool | hc::atomic_compare_exchange (int *dest, int *expected_val, int val) __CPU__ __HC__ |
| These functions attempt to perform these three steps atomically: More...
|
|
bool | hc::atomic_compare_exchange (uint64_t *dest, uint64_t *expected_val, uint64_t val) __CPU__ __HC__ |
| These functions attempt to perform these three steps atomically: More...
|
|
|
unsigned | hc::atomic_fetch_add (unsigned *x, unsigned y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
int | hc::atomic_fetch_add (int *x, int y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
float | hc::atomic_fetch_add (float *x, float y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
uint64_t | hc::atomic_fetch_add (uint64_t *x, uint64_t y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
unsigned | hc::atomic_fetch_sub (unsigned *x, unsigned y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
int | hc::atomic_fetch_sub (int *x, int y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
float | hc::atomic_fetch_sub (float *x, float y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
unsigned | hc::atomic_fetch_and (unsigned *x, unsigned y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
int | hc::atomic_fetch_and (int *x, int y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
uint64_t | hc::atomic_fetch_and (uint64_t *x, uint64_t y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
unsigned | hc::atomic_fetch_or (unsigned *x, unsigned y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
int | hc::atomic_fetch_or (int *x, int y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
uint64_t | hc::atomic_fetch_or (uint64_t *x, uint64_t y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
unsigned | hc::atomic_fetch_xor (unsigned *x, unsigned y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
int | hc::atomic_fetch_xor (int *x, int y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
uint64_t | hc::atomic_fetch_xor (uint64_t *x, uint64_t y) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
int | hc::atomic_fetch_max (int *dest, int val) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
unsigned int | hc::atomic_fetch_max (unsigned int *dest, unsigned int val) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
uint64_t | hc::atomic_fetch_max (uint64_t *dest, uint64_t val) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
int | hc::atomic_fetch_min (int *dest, int val) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
unsigned int | hc::atomic_fetch_min (unsigned int *dest, unsigned int val) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
uint64_t | hc::atomic_fetch_min (uint64_t *dest, uint64_t val) __CPU__ __HC__ |
| Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...
|
|
|
int | hc::atomic_fetch_inc (int *_Dest) __CPU__ __HC__ |
| Atomically increment or decrement the value stored at the location point to by dest. More...
|
|
unsigned int | hc::atomic_fetch_inc (unsigned int *_Dest) __CPU__ __HC__ |
| Atomically increment or decrement the value stored at the location point to by dest. More...
|
|
int | hc::atomic_fetch_dec (int *_Dest) __CPU__ __HC__ |
| Atomically increment or decrement the value stored at the location point to by dest. More...
|
|
unsigned int | hc::atomic_fetch_dec (unsigned int *_Dest) __CPU__ __HC__ |
| Atomically increment or decrement the value stored at the location point to by dest. More...
|
|