Heterogeneous C++ (HC) API. More...

#include "hc_defines.h"
#include "kalmar_exception.h"
#include "kalmar_index.h"
#include "kalmar_runtime.h"
#include "kalmar_serialize.h"
#include "kalmar_launch.h"
#include "kalmar_buffer.h"
#include "kalmar_math.h"
#include "hsa_atomic.h"
#include "kalmar_cpu_launch.h"
#include "hcc_features.hpp"

Include dependency graph for hc.hpp:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes
class	hc::extent< N >
	Represents a unique position in N-dimensional space. More...

class	hc::tiled_extent< N >
	Represents an extent subdivided into tiles. More...

class	hc::array_view< T, N >
	The array_view<T,N> type represents a possibly cached view into the data held in an array<T,N>, or a section thereof. More...

class	hc::array< T, N >
	Represents an N-dimensional region of memory (with type T) located on an accelerator. More...

class	hc::accelerator_view
	Represents a logical (isolated) accelerator view of a compute accelerator. More...

class	hc::accelerator
	Represents a physical accelerated computing device. More...

class	hc::completion_future
	This class is the return type of all asynchronous APIs and has an interface analogous to std::shared_future<void>. More...

class	hc::extent< N >
	Represents a unique position in N-dimensional space. More...

class	hc::tiled_extent< N >
	Represents an extent subdivided into tiles. More...

class	hc::tiled_extent< 1 >
	Represents an extent subdivided into tiles. More...

class	hc::tiled_extent< 2 >
	Represents an extent subdivided into tiles. More...

class	hc::tiled_extent< 3 >
	Represents an extent subdivided into tiles. More...

union	hc::__u

class	hc::tile_barrier
	The tile_barrier class is a capability class that is only creatable by the system, and passed to a tiled parallel_for_each function object as part of the tiled_index parameter. More...

class	hc::tiled_index< N >
	Represents a set of related indices subdivided into 1-, 2-, or 3-dimensional tiles. More...

class	hc::tiled_index< 1 >
	Represents a set of related indices subdivided into 1-, 2-, or 3-dimensional tiles. More...

class	hc::tiled_index< 2 >
	Represents a set of related indices subdivided into 1-, 2-, or 3-dimensional tiles. More...

struct	hc::projection_helper< T, N >

struct	hc::projection_helper< T, 1 >

struct	hc::projection_helper< const T, N >

struct	hc::projection_helper< const T, 1 >

struct	hc::__has_data< T >

struct	hc::__has_size< T >

struct	hc::__is_container< T >

struct	hc::array_projection_helper< T, N >

struct	hc::array_projection_helper< T, 1 >

class	hc::array< T, N >
	Represents an N-dimensional region of memory (with type T) located on an accelerator. More...

class	hc::array_view< T, N >
	The array_view<T,N> type represents a possibly cached view into the data held in an array<T,N>, or a section thereof. More...

class	hc::array_view< const T, N >
	The partial specialization array_view<const T,N> represents a view over elements of type const T with rank N. More...

struct	hc::copy_input< InputIter, T, N, dim >

struct	hc::copy_input< InputIter, T, N, N >

struct	hc::copy_output< OutputIter, T, N, dim >

struct	hc::copy_output< OutputIter, T, N, N >

struct	hc::copy_bidir< T, N, dim >

struct	hc::copy_bidir< T, N, N >

struct	hc::do_copy< Iter, T, N >

struct	hc::do_copy< Iter, T, 1 >

struct	hc::do_copy< T *, T, N >

struct	hc::do_copy< T *, T, 1 >

struct	hc::pfe_helper< N, Kernel, _Tp >

struct	hc::pfe_helper< 0, Kernel, _Tp >

class	hc::pfe_wrapper< N, Kernel >

Namespaces
	hc
	Heterogeneous C++ (HC) namespace.

	Kalmar
	namespace for internal classes of Kalmar compiler / runtime

Macros
#define	__HC__ [[hc]]

#define	__CPU__ [[cpu]]

#define	GET_SYMBOL_ADDRESS(acc, symbol) acc.get_symbol_address( #symbol );

#define	__HSA_WAVEFRONT_SIZE__ (64)
	Fetch the size of a wavefront. More...

Typedefs
typedef struct hsa_kernel_dispatch_packet_s	hsa_kernel_dispatch_packet_t

template<int N>
using	hc::index = Kalmar::index< N >
	Represents a unique position in N-dimensional space.

using	hc::runtime_exception = Kalmar::runtime_exception

using	hc::invalid_compute_domain = Kalmar::invalid_compute_domain

using	hc::accelerator_view_removed = Kalmar::accelerator_view_removed

Functions
uint64_t	hc::get_system_ticks ()
	Get the current tick count for the GPU platform. More...

uint64_t	hc::get_tick_frequency ()
	Get the frequency of ticks per second for the underlying asynchrnous operation. More...

unsigned int	hc::__wavesize () __HC__

unsigned int	hc::__popcount_u32_b32 (unsigned int input) __HC__
	Count number of 1 bits in the input. More...

unsigned int	hc::__popcount_u32_b64 (unsigned long long int input) __HC__
	Count number of 1 bits in the input. More...

unsigned int	hc::__firstbit_u32_u32 (unsigned int input) __HC__
	Count leading zero bits in the input. More...

unsigned int	hc::__firstbit_u32_u64 (unsigned long long int input) __HC__
	Count leading zero bits in the input. More...

unsigned int	hc::__firstbit_u32_s32 (int input) __HC__
	Count leading zero bits in the input. More...

unsigned int	hc::__firstbit_u32_s64 (long long int input) __HC__
	Count leading zero bits in the input. More...

unsigned int	hc::__bitalign_b32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Align 32 bits within 64 bits of data on an arbitrary bit boundary. More...

unsigned int	hc::__bytealign_b32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Align 32 bits within 64 bis of data on an arbitrary byte boundary. More...

unsigned int	hc::__lerp_u8x4 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Do linear interpolation and computes the unsigned 8-bit average of packed data. More...

unsigned int	hc::__packcvt_u8x4_f32 (float src0, float src1, float src2, float src3) __HC__
	Takes four floating-point number, convers them to unsigned integer values, and packs them into a packed u8x4 value. More...

float	hc::__unpackcvt_f32_u8x4 (unsigned int src0, unsigned int src1) __HC__
	Unpacks a single element from a packed u8x4 value and converts it to an f32. More...

unsigned int	hc::__sadhi_u16x2_u8x4 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	This function is mostly the same as sad except the sum of absolute differences is added to the most significant 16 bits of the result. More...

uint64_t	hc::__clock_u64 () __HC__
	Get system timestamp.

uint64_t	hc::__cycle_u64 () __HC__
	Get hardware cycle count. More...

unsigned int	hc::__activelaneid_u32 () __HC__
	Get the count of the number of earlier (in flattened work-item order) active work-items within the same wavefront. More...

uint64_t	hc::__activelanemask_v4_b64_b1 (unsigned int input) __HC__
	Return a bit mask shows which active work-items in the wavefront have a non-zero input. More...

unsigned int	hc::__activelanecount_u32_b1 (unsigned int input) __HC__
	Count the number of active work-items in the current wavefront that have a non-zero input. More...

int	hc::__any (int predicate) __HC__
	Evaluate predicate for all active work-items in the wavefront and return non-zero if and only if predicate evaluates to non-zero for all of them.

int	hc::__all (int predicate) __HC__
	Evaluate predicate for all active work-items in the wavefront and return non-zero if and only if predicate evaluates to non-zero for any of them.

uint64_t	hc::__ballot (int predicate) __HC__
	Evaluate predicate for all active work-items in the wavefront and return an integer whose Nth bit is set if and only if predicate evaluates to non-zero for the Nth work-item of the wavefront and the Nth work-item is active.

unsigned int	hc::__shfl_xor (unsigned int var, int laneMask, int width=__HSA_WAVEFRONT_SIZE__) __HC__

unsigned int	hc::__mul24 (unsigned int x, unsigned int y)[[hc]]
	Multiply two unsigned integers (x,y) but only the lower 24 bits will be used in the multiplication. More...

int	hc::__mul24 (int x, int y)[[hc]]
	Multiply two integers (x,y) but only the lower 24 bits will be used in the multiplication. More...

unsigned int	hc::__mad24 (unsigned int x, unsigned int y, unsigned int z)[[hc]]
	Multiply two unsigned integers (x,y) but only the lower 24 bits will be used in the multiplication and then add the product to a 32-bit unsigned integer. More...

int	hc::__mad24 (int x, int y, int z)[[hc]]
	Multiply two integers (x,y) but only the lower 24 bits will be used in the multiplication and then add the product to a 32-bit integer. More...

void	hc::abort () __HC__

unsigned int	hc::get_group_segment_size () __HC__
	Fetch the size of group segment. More...

unsigned int	hc::get_static_group_segment_size () __HC__
	Fetch the size of static group segment. More...

void *	hc::get_group_segment_base_pointer () __HC__
	Fetch the address of the beginning of group segment.

void *	hc::get_dynamic_group_segment_base_pointer () __HC__
	Fetch the address of the beginning of dynamic group segment.

void	hc::all_memory_fence (const tile_barrier &) __HC__
	Establishes a thread-tile scoped memory fence for both global and tile-static memory operations. More...

void	hc::global_memory_fence (const tile_barrier &) __HC__
	Establishes a thread-tile scoped memory fence for global (but not tile-static) memory operations. More...

void	hc::tile_static_memory_fence (const tile_barrier &) __HC__
	Establishes a thread-tile scoped memory fence for tile-static (but not global) memory operations. More...

template<int N>
const extent< N > &	hc::check (const extent< N > &ext)

template<typename T , int N>
void	hc::copy (const array< T, N > &src, array< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename OutputIter , typename T , int N>
void	hc::copy (const array_view< T, N > &src, OutputIter destBegin)
	The contents of a source array are copied into "dest" starting with iterator destBegin. More...

template<typename OutputIter , typename T , int N>
void	hc::copy (const array< T, N > &src, OutputIter destBegin)
	The contents of a source array are copied into "dest" starting with iterator destBegin. More...

template<typename T , int N>
completion_future	hc::copy_async (const array< T, N > &src, array< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T , int N>
completion_future	hc::copy_async (const array< T, N > &src, const array_view< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename OutputIter , typename T , int N>
completion_future	hc::copy_async (const array< T, N > &src, OutputIter destBegin)
	The contents of a source array are copied into "dest" starting with iterator destBegin. More...

template<typename OutputIter , typename T , int N>
completion_future	hc::copy_async (const array_view< T, N > &src, OutputIter destBegin)
	The contents of a source array are copied into "dest" starting with iterator destBegin. More...

template<typename T , int N>
completion_future	hc::copy_async (const array< T, N > &src, const array< T, N > &dest)

template<typename T , int N>
completion_future	hc::copy_async (const array_view< const T, N > &src, const array< T, N > &dest)

template<typename T , int N>
completion_future	hc::copy_async (const array_view< T, N > &src, const array< T, N > &dest)

unsigned int	hc::__atomic_wrapinc (unsigned int *address, unsigned int val) __HC__
	Atomically do the following operations: More...

unsigned int	hc::__atomic_wrapdec (unsigned int *address, unsigned int val) __HC__
	Atomically do the following operations: More...

template<int N, typename Kernel >
completion_future	hc::parallel_for_each (const accelerator_view &, const extent< N > &, const Kernel &)

template<typename Kernel >
completion_future	hc::parallel_for_each (const accelerator_view &, const tiled_extent< 3 > &, const Kernel &)

template<typename Kernel >
completion_future	hc::parallel_for_each (const accelerator_view &, const tiled_extent< 2 > &, const Kernel &)

template<typename Kernel >
completion_future	hc::parallel_for_each (const accelerator_view &, const tiled_extent< 1 > &, const Kernel &)

template<int N, typename Kernel >
completion_future	hc::parallel_for_each (const extent< N > &compute_domain, const Kernel &f)

template<typename Kernel >
completion_future	hc::parallel_for_each (const tiled_extent< 3 > &compute_domain, const Kernel &f)

template<typename Kernel >
completion_future	hc::parallel_for_each (const tiled_extent< 2 > &compute_domain, const Kernel &f)

template<typename Kernel >
completion_future	hc::parallel_for_each (const tiled_extent< 1 > &compute_domain, const Kernel &f)

template<int N, typename Kernel >
	hc::__attribute__ ((noinline, used)) completion_future parallel_for_each(const accelerator_view &av

	hc::if (av.get_accelerator().get_device_path()==L"cpu")

return	hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< pfe_wrapper< N, Kernel >, 3 >(av.pQueue, ext, NULL, _pf))

template<typename Kernel >
	hc::__attribute__ ((noinline, used)) completion_future parallel_for_each(const accelerator_view &av

	hc::if (compute_domain[0]< 0)

	hc::if (static_cast< size_t >(compute_domain[0]) > 4294967295L) throw invalid_compute_domain("Extent size too large.")

return	hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< Kernel, 1 >(av.pQueue,&ext, NULL, f))

	hc::if (static_cast< size_t >(compute_domain[1]) > 4294967295L) throw invalid_compute_domain("Extent size too large.")

return	hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< Kernel, 2 >(av.pQueue, ext, NULL, f))

	hc::if (static_cast< size_t >(compute_domain[2]) > 4294967295L) throw invalid_compute_domain("Extent size too large.")

return	hc::completion_future (Kalmar::mcw_cxxamp_launch_kernel_async< Kernel, 3 >(av.pQueue, ext, NULL, f))

return	hc::completion_future (Kalmar::mcw_cxxamp_execute_kernel_with_dynamic_group_memory_async< Kernel, 1 >(av.pQueue,&ext,&tile, f, kernel, compute_domain.get_dynamic_group_segment_size()))

return	hc::completion_future (Kalmar::mcw_cxxamp_execute_kernel_with_dynamic_group_memory_async< Kernel, 2 >(av.pQueue, ext, tile, f, kernel, compute_domain.get_dynamic_group_segment_size()))

return	hc::completion_future (Kalmar::mcw_cxxamp_execute_kernel_with_dynamic_group_memory_async< Kernel, 3 >(av.pQueue, ext, tile, f, kernel, compute_domain.get_dynamic_group_segment_size()))


template<int N>
extent< N >	hc::operator+ (const extent< N > &lhs, const extent< N > &rhs) __CPU__ __HC__
	Adds (or subtracts) two objects of extent<N> to form a new extent. More...

template<int N>
extent< N >	hc::operator- (const extent< N > &lhs, const extent< N > &rhs) __CPU__ __HC__
	Adds (or subtracts) two objects of extent<N> to form a new extent. More...


template<int N>
extent< N >	hc::operator+ (const extent< N > &ext, int value) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator+ (int value, const extent< N > &ext) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator- (const extent< N > &ext, int value) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator- (int value, const extent< N > &ext) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator* (const extent< N > &ext, int value) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator* (int value, const extent< N > &ext) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator/ (const extent< N > &ext, int value) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator/ (int value, const extent< N > &ext) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator% (const extent< N > &ext, int value) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...

template<int N>
extent< N >	hc::operator% (int value, const extent< N > &ext) __CPU__ __HC__
	Binary arithmetic operations that produce a new extent<N> that is the result of performing the corresponding binary arithmetic operation on the elements of the extent operands. More...


unsigned int	hc::__bitextract_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Extract a range of bits. More...

uint64_t	hc::__bitextract_u64 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__
	Extract a range of bits. More...

int	hc::__bitextract_s32 (int src0, unsigned int src1, unsigned int src2) __HC__
	Extract a range of bits. More...

int64_t	hc::__bitextract_s64 (int64_t src0, unsigned int src1, unsigned int src2) __HC__
	Extract a range of bits. More...


unsigned int	hc::__bitinsert_u32 (unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) __HC__
	Replace a range of bits. More...

uint64_t	hc::__bitinsert_u64 (uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) __HC__
	Replace a range of bits. More...

int	hc::__bitinsert_s32 (int src0, int src1, unsigned int src2, unsigned int src3) __HC__
	Replace a range of bits. More...

int64_t	hc::__bitinsert_s64 (int64_t src0, int64_t src1, unsigned int src2, unsigned int src3) __HC__
	Replace a range of bits. More...


unsigned int	hc::__bitmask_b32 (unsigned int src0, unsigned int src1) __HC__
	Create a bit mask that can be used with bitselect. More...

uint64_t	hc::__bitmask_b64 (unsigned int src0, unsigned int src1) __HC__
	Create a bit mask that can be used with bitselect. More...


unsigned int	hc::__bitrev_b32 (unsigned int src0)[[hc]] __asm("llvm.bitreverse.i32")
	Reverse the bits. More...

uint64_t	hc::__bitrev_b64 (uint64_t src0)[[hc]] __asm("llvm.bitreverse.i64")
	Reverse the bits. More...


unsigned int	hc::__bitselect_b32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Do bit field selection. More...

uint64_t	hc::__bitselect_b64 (uint64_t src0, uint64_t src1, uint64_t src2) __HC__
	Do bit field selection. More...


unsigned int	hc::__lastbit_u32_u32 (unsigned int input) __HC__
	Find the first bit set to 1 in a number starting from the least significant bit. More...

unsigned int	hc::__lastbit_u32_u64 (unsigned long long int input) __HC__
	Find the first bit set to 1 in a number starting from the least significant bit. More...

unsigned int	hc::__lastbit_u32_s32 (int input) __HC__
	Find the first bit set to 1 in a number starting from the least significant bit. More...

unsigned int	hc::__lastbit_u32_s64 (unsigned long long input) __HC__
	Find the first bit set to 1 in a number starting from the least significant bit. More...


unsigned int	hc::__unpacklo_u8x4 (unsigned int src0, unsigned int src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

uint64_t	hc::__unpacklo_u8x8 (uint64_t src0, uint64_t src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

unsigned int	hc::__unpacklo_u16x2 (unsigned int src0, unsigned int src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

uint64_t	hc::__unpacklo_u16x4 (uint64_t src0, uint64_t src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

uint64_t	hc::__unpacklo_u32x2 (uint64_t src0, uint64_t src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

int	hc::__unpacklo_s8x4 (int src0, int src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

int64_t	hc::__unpacklo_s8x8 (int64_t src0, int64_t src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

int	hc::__unpacklo_s16x2 (int src0, int src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

int64_t	hc::__unpacklo_s16x4 (int64_t src0, int64_t src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...

int64_t	hc::__unpacklo_s32x2 (int64_t src0, int64_t src1) __HC__
	Copy and interleave the lower half of the elements from each source into the desitionation. More...


unsigned int	hc::__unpackhi_u8x4 (unsigned int src0, unsigned int src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

uint64_t	hc::__unpackhi_u8x8 (uint64_t src0, uint64_t src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

unsigned int	hc::__unpackhi_u16x2 (unsigned int src0, unsigned int src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

uint64_t	hc::__unpackhi_u16x4 (uint64_t src0, uint64_t src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

uint64_t	hc::__unpackhi_u32x2 (uint64_t src0, uint64_t src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

int	hc::__unpackhi_s8x4 (int src0, int src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

int64_t	hc::__unpackhi_s8x8 (int64_t src0, int64_t src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

int	hc::__unpackhi_s16x2 (int src0, int src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

int64_t	hc::__unpackhi_s16x4 (int64_t src0, int64_t src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...

int64_t	hc::__unpackhi_s32x2 (int64_t src0, int64_t src1) __HC__
	Copy and interleave the upper half of the elements from each source into the desitionation. More...


unsigned int	hc::__pack_u8x4_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

uint64_t	hc::__pack_u8x8_u32 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

unsigned	hc::__pack_u16x2_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

uint64_t	hc::__pack_u16x4_u32 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

uint64_t	hc::__pack_u32x2_u32 (uint64_t src0, unsigned int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

int	hc::__pack_s8x4_s32 (int src0, int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

int64_t	hc::__pack_s8x8_s32 (int64_t src0, int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

int	hc::__pack_s16x2_s32 (int src0, int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

int64_t	hc::__pack_s16x4_s32 (int64_t src0, int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

int64_t	hc::__pack_s32x2_s32 (int64_t src0, int src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...

double	hc::__pack_f32x2_f32 (double src0, float src1, unsigned int src2) __HC__
	Assign the elements of the packed value in src0, replacing the element specified by src2 with the value from src1. More...


unsigned int	hc::__unpack_u32_u8x4 (unsigned int src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

unsigned int	hc::__unpack_u32_u8x8 (uint64_t src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

unsigned int	hc::__unpack_u32_u16x2 (unsigned int src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

unsigned int	hc::__unpack_u32_u16x4 (uint64_t src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

unsigned int	hc::__unpack_u32_u32x2 (uint64_t src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

int	hc::__unpack_s32_s8x4 (int src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

int	hc::__unpack_s32_s8x8 (int64_t src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

int	hc::__unpack_s32_s16x2 (int src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

int	hc::__unpack_s32_s16x4 (int64_t src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

int	hc::__unpack_s32_s3x2 (int64_t src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...

float	hc::__unpack_f32_f32x2 (double src0, unsigned int src1) __HC__
	Assign the elements specified by src1 from the packed value in src0. More...


unsigned int	hc::__sad_u32_u32 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Computes the sum of the absolute differences of src0 and src1 and then adds src2 to the result. More...

unsigned int	hc::__sad_u32_u16x2 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Computes the sum of the absolute differences of src0 and src1 and then adds src2 to the result. More...

unsigned int	hc::__sad_u32_u8x4 (unsigned int src0, unsigned int src1, unsigned int src2) __HC__
	Computes the sum of the absolute differences of src0 and src1 and then adds src2 to the result. More...


int	hc::__amdgcn_mbcnt_lo (int mask, int src)[[hc]] __asm("llvm.amdgcn.mbcnt.lo")
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_mbcnt_hi (int mask, int src)[[hc]] __asm("llvm.amdgcn.mbcnt.hi")
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__lane_id (void)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_ds_bpermute (int index, int src)[[hc]] __asm("llvm.amdgcn.ds.bpermute")
	ds_bpermute intrinsic FIXME: We need to add __builtin_amdgcn_ds_bpermute to clang and call it here instead.

unsigned int	hc::__amdgcn_ds_bpermute (int index, unsigned int src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__amdgcn_ds_bpermute (int index, float src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_ds_permute (int index, int src)[[hc]]
	ds_permute intrinsic

unsigned int	hc::__amdgcn_ds_permute (int index, unsigned int src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__amdgcn_ds_permute (int index, float src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_ds_swizzle (int src, int pattern)[[hc]]
	ds_swizzle intrinsic

unsigned int	hc::__amdgcn_ds_swizzle (unsigned int src, int pattern)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__amdgcn_ds_swizzle (float src, int pattern)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_move_dpp (int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl)[[hc]]
	move DPP intrinsic

int	hc::__amdgcn_wave_sr1 (int src, bool bound_ctrl)[[hc]]
	Shift the value of src to the right by one thread within a wavefront. More...

unsigned int	hc::__amdgcn_wave_sr1 (unsigned int src, bool bound_ctrl)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__amdgcn_wave_sr1 (float src, bool bound_ctrl)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_wave_sl1 (int src, bool bound_ctrl)[[hc]]
	Shift the value of src to the left by one thread within a wavefront. More...

unsigned int	hc::__amdgcn_wave_sl1 (unsigned int src, bool bound_ctrl)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__amdgcn_wave_sl1 (float src, bool bound_ctrl)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_wave_rr1 (int src)[[hc]]
	Rotate the value of src to the right by one thread within a wavefront. More...

unsigned int	hc::__amdgcn_wave_rr1 (unsigned int src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__amdgcn_wave_rr1 (float src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__amdgcn_wave_rl1 (int src)[[hc]]
	Rotate the value of src to the left by one thread within a wavefront. More...

unsigned int	hc::__amdgcn_wave_rl1 (unsigned int src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__amdgcn_wave_rl1 (float src)[[hc]]
	Direct copy from indexed active work-item within a wavefront. More...

int	hc::__shfl (int var, int srcLane, int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Direct copy from indexed active work-item within a wavefront. More...

unsigned int	hc::__shfl (unsigned int var, int srcLane, int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Direct copy from indexed active work-item within a wavefront. More...

float	hc::__shfl (float var, int srcLane, int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Direct copy from indexed active work-item within a wavefront. More...


int	hc::__shfl_up (int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item with lower ID relative to caller within a wavefront. More...

unsigned int	hc::__shfl_up (unsigned int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item with lower ID relative to caller within a wavefront. More...

float	hc::__shfl_up (float var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item with lower ID relative to caller within a wavefront. More...


int	hc::__shfl_down (int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item with higher ID relative to caller within a wavefront. More...

unsigned int	hc::__shfl_down (unsigned int var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item with higher ID relative to caller within a wavefront. More...

float	hc::__shfl_down (float var, const unsigned int delta, const int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item with higher ID relative to caller within a wavefront. More...


int	hc::__shfl_xor (int var, int laneMask, int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item based on bitwise XOR of caller work-item ID within a wavefront. More...

float	hc::__shfl_xor (float var, int laneMask, int width=__HSA_WAVEFRONT_SIZE__) __HC__
	Copy from an active work-item based on bitwise XOR of caller work-item ID within a wavefront. More...


template<typename T , int N>
void	hc::copy (const array_view< const T, N > &src, const array_view< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T , int N>
void	hc::copy (const array_view< T, N > &src, const array_view< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T >
void	hc::copy (const array_view< const T, 1 > &src, const array_view< T, 1 > &dest)
	The contents of "src" are copied into "dest". More...


template<typename T , int N>
void	hc::copy (const array< T, N > &src, const array_view< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T >
void	hc::copy (const array< T, 1 > &src, const array_view< T, 1 > &dest)
	The contents of "src" are copied into "dest". More...


template<typename T , int N>
void	hc::copy (const array_view< const T, N > &src, array< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T , int N>
void	hc::copy (const array_view< T, N > &src, array< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T >
void	hc::copy (const array_view< const T, 1 > &src, array< T, 1 > &dest)
	The contents of "src" are copied into "dest". More...


template<typename InputIter , typename T , int N>
void	hc::copy (InputIter srcBegin, InputIter srcEnd, const array_view< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...

template<typename InputIter , typename T , int N>
void	hc::copy (InputIter srcBegin, const array_view< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...


template<typename InputIter , typename T , int N>
void	hc::copy (InputIter srcBegin, InputIter srcEnd, array< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...

template<typename InputIter , typename T , int N>
void	hc::copy (InputIter srcBegin, array< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...


template<typename T , int N>
completion_future	hc::copy_async (const array_view< const T, N > &src, array< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T , int N>
completion_future	hc::copy_async (const array_view< T, N > &src, array< T, N > &dest)
	The contents of "src" are copied into "dest". More...


template<typename T , int N>
completion_future	hc::copy_async (const array_view< const T, N > &src, const array_view< T, N > &dest)
	The contents of "src" are copied into "dest". More...

template<typename T , int N>
completion_future	hc::copy_async (const array_view< T, N > &src, const array_view< T, N > &dest)
	The contents of "src" are copied into "dest". More...


template<typename InputIter , typename T , int N>
completion_future	hc::copy_async (InputIter srcBegin, InputIter srcEnd, array< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...

template<typename InputIter , typename T , int N>
completion_future	hc::copy_async (InputIter srcBegin, array< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...


template<typename InputIter , typename T , int N>
completion_future	hc::copy_async (InputIter srcBegin, InputIter srcEnd, const array_view< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...

template<typename InputIter , typename T , int N>
completion_future	hc::copy_async (InputIter srcBegin, const array_view< T, N > &dest)
	The contents of a source container from the iterator range [srcBegin,srcEnd) are copied into "dest". More...


unsigned int	hc::atomic_exchange (unsigned int *dest, unsigned int val) __CPU__ __HC__
	Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...

int	hc::atomic_exchange (int *dest, int val) __CPU__ __HC__
	Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...

float	hc::atomic_exchange (float *dest, float val) __CPU__ __HC__
	Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...

uint64_t	hc::atomic_exchange (uint64_t *dest, uint64_t val) __CPU__ __HC__
	Atomically read the value stored in dest , replace it with the value given in val and return the old value to the caller. More...


bool	hc::atomic_compare_exchange (unsigned int dest, unsigned int expected_val, unsigned int val) __CPU__ __HC__
	These functions attempt to perform these three steps atomically: More...

bool	hc::atomic_compare_exchange (int dest, int expected_val, int val) __CPU__ __HC__
	These functions attempt to perform these three steps atomically: More...

bool	hc::atomic_compare_exchange (uint64_t dest, uint64_t expected_val, uint64_t val) __CPU__ __HC__
	These functions attempt to perform these three steps atomically: More...


unsigned	hc::atomic_fetch_add (unsigned *x, unsigned y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

int	hc::atomic_fetch_add (int *x, int y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

float	hc::atomic_fetch_add (float *x, float y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

uint64_t	hc::atomic_fetch_add (uint64_t *x, uint64_t y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

unsigned	hc::atomic_fetch_sub (unsigned *x, unsigned y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

int	hc::atomic_fetch_sub (int *x, int y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

float	hc::atomic_fetch_sub (float *x, float y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

unsigned	hc::atomic_fetch_and (unsigned *x, unsigned y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

int	hc::atomic_fetch_and (int *x, int y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

uint64_t	hc::atomic_fetch_and (uint64_t *x, uint64_t y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

unsigned	hc::atomic_fetch_or (unsigned *x, unsigned y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

int	hc::atomic_fetch_or (int *x, int y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

uint64_t	hc::atomic_fetch_or (uint64_t *x, uint64_t y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

unsigned	hc::atomic_fetch_xor (unsigned *x, unsigned y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

int	hc::atomic_fetch_xor (int *x, int y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

uint64_t	hc::atomic_fetch_xor (uint64_t *x, uint64_t y) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

int	hc::atomic_fetch_max (int *dest, int val) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

unsigned int	hc::atomic_fetch_max (unsigned int *dest, unsigned int val) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

uint64_t	hc::atomic_fetch_max (uint64_t *dest, uint64_t val) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

int	hc::atomic_fetch_min (int *dest, int val) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

unsigned int	hc::atomic_fetch_min (unsigned int *dest, unsigned int val) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...

uint64_t	hc::atomic_fetch_min (uint64_t *dest, uint64_t val) __CPU__ __HC__
	Atomically read the value stored in dest, apply the binary numerical operation specific to the function with the read value and val serving as input operands, and store the result back to the location pointed by dest. More...


int	hc::atomic_fetch_inc (int *_Dest) __CPU__ __HC__
	Atomically increment or decrement the value stored at the location point to by dest. More...

unsigned int	hc::atomic_fetch_inc (unsigned int *_Dest) __CPU__ __HC__
	Atomically increment or decrement the value stored at the location point to by dest. More...

int	hc::atomic_fetch_dec (int *_Dest) __CPU__ __HC__
	Atomically increment or decrement the value stored at the location point to by dest. More...

unsigned int	hc::atomic_fetch_dec (unsigned int *_Dest) __CPU__ __HC__
	Atomically increment or decrement the value stored at the location point to by dest. More...

Variables
const extent< N > &	hc::compute_domain

const extent< N > const Kernel &f __CPU__	hc::__HC__

const pfe_wrapper< N, Kernel >	hc::_pf (compute_domain, f)

size_t	hc::ext = compute_domain[0]

size_t	hc::tile = compute_domain.tile_dim[0]

void *	hc::kernel = Kalmar::mcw_cxxamp_get_kernel<Kernel>(av.pQueue, f)

Detailed Description

Heterogeneous C++ (HC) API.

Macro Definition Documentation

#define __HSA_WAVEFRONT_SIZE__ (64)

Fetch the size of a wavefront.

Returns: The size of a wavefront.

Referenced by hc::__shfl_xor().

Classes

Namespaces

Macros

Typedefs

Functions

Variables

Detailed Description

Macro Definition Documentation