AI Engine  (AIE) r2p15.2
 All Data Structures Functions Variables Groups Pages
Cyclic Addressing

Overview

Cyclic addressing intrinsics allow indexing into circular buffers.

Macros

#define cyclic_add_unaligned_load(P, OFFS, START, LEN)   (circ_add_load_ptr ((P), (mod_t)((OFFS)*sizeof(v8int16)), (START), (mod_t)((LEN)*sizeof(v8int16)), (P)))
 
#define cyclic_add_unaligned_store(P, OFFS, START, LEN, D)   (circ_add_store_ptr((P), (D), (mod_t)((OFFS)*sizeof(v8int16)), (START), (mod_t)((LEN)*sizeof(v8int16)), (P)))
 
#define PROM(T)
 

Functions

promotion v32int8 circ_add_load_ptr (const v32int8 *, mod_t, const v32int8 *, mod_t, const v32int8 *&)
 
promotion v32uint8 circ_add_load_ptr (const v32uint8 *, mod_t, const v32uint8 *, mod_t, const v32uint8 *&)
 
promotion v16int16 circ_add_load_ptr (const v16int16 *, mod_t, const v16int16 *, mod_t, const v16int16 *&)
 
promotion v8cint16 circ_add_load_ptr (const v8cint16 *, mod_t, const v8cint16 *, mod_t, const v8cint16 *&)
 
promotion v8int32 circ_add_load_ptr (const v8int32 *, mod_t, const v8int32 *, mod_t, const v8int32 *&)
 
promotion v4cint32 circ_add_load_ptr (const v4cint32 *, mod_t, const v4cint32 *, mod_t, const v4cint32 *&)
 
promotion v4int64 circ_add_load_ptr (const v4int64 *, mod_t, const v4int64 *, mod_t, const v4int64 *&)
 
promotion v2cint64 circ_add_load_ptr (const v2cint64 *, mod_t, const v2cint64 *, mod_t, const v2cint64 *&)
 
promotion v2int128 circ_add_load_ptr (const v2int128 *, mod_t, const v2int128 *, mod_t, const v2int128 *&)
 
promotion v8float circ_add_load_ptr (const v8float *, mod_t, const v8float *, mod_t, const v8float *&)
 
promotion v4cfloat circ_add_load_ptr (const v4cfloat *, mod_t, const v4cfloat *, mod_t, const v4cfloat *&)
 
promotion void circ_add_store_ptr (const v32int8 *, v32int8, mod_t, const v32int8 *, mod_t, const v32int8 *&)
 
promotion void circ_add_store_ptr (const v32uint8 *, v32uint8, mod_t, const v32uint8 *, mod_t, const v32uint8 *&)
 
promotion void circ_add_store_ptr (const v16int16 *, v16int16, mod_t, const v16int16 *, mod_t, const v16int16 *&)
 
promotion void circ_add_store_ptr (const v8cint16 *, v8cint16, mod_t, const v8cint16 *, mod_t, const v8cint16 *&)
 
promotion void circ_add_store_ptr (const v8int32 *, v8int32, mod_t, const v8int32 *, mod_t, const v8int32 *&)
 
promotion void circ_add_store_ptr (const v4cint32 *, v4cint32, mod_t, const v4cint32 *, mod_t, const v4cint32 *&)
 
promotion void circ_add_store_ptr (const v4int64 *, v4int64, mod_t, const v4int64 *, mod_t, const v4int64 *&)
 
promotion void circ_add_store_ptr (const v2cint64 *, v2cint64, mod_t, const v2cint64 *, mod_t, const v2cint64 *&)
 
promotion void circ_add_store_ptr (const v2int128 *, v2int128, mod_t, const v2int128 *, mod_t, const v2int128 *&)
 
promotion void circ_add_store_ptr (const v8float *, v8float, mod_t, const v8float *, mod_t, const v8float *&)
 
promotion void circ_add_store_ptr (const v4cfloat *, v4cfloat, mod_t, const v4cfloat *, mod_t, const v4cfloat *&)
 
void * cyclic_add (void *a, int offs, void *restrict start, int len)
 

Macro Definition Documentation

#define cyclic_add_unaligned_load (   P,
  OFFS,
  START,
  LEN 
)    (circ_add_load_ptr ((P), (mod_t)((OFFS)*sizeof(v8int16)), (START), (mod_t)((LEN)*sizeof(v8int16)), (P)))

Load a vector from a pointer and perform a cyclic increment or decrement within a buffer

Please use this intrinsic if your pointer is unaligned with respect to the vector size.

Parameters
a]Current circular buffer pointer position
offs]Number of (half) elements to increment the pointer by (each step is half the size of the input type)
start]Pointer to the start of the circular buffer (must be aligned to the vector size)
len]Size of the circular buffer (double the number of elements of the datatype of the pointer)
#define cyclic_add_unaligned_store (   P,
  OFFS,
  START,
  LEN,
 
)    (circ_add_store_ptr((P), (D), (mod_t)((OFFS)*sizeof(v8int16)), (START), (mod_t)((LEN)*sizeof(v8int16)), (P)))
#define PROM (   T)
Value:
promotion T circ_add_load_ptr (const T*, mod_t,const T*,mod_t, const T*&) = v16w16 circ_add_load (addr, amod,addr,amod, addr&); \
promotion void circ_add_store_ptr(const T*,T,mod_t,const T*,mod_t, const T*&) = void circ_add_store(addr,v16w16,amod,addr,amod, addr&);

Function Documentation

promotion v32int8 circ_add_load_ptr ( const v32int8 ,
mod_t  ,
const v32int8 ,
mod_t  ,
const v32int8 *&   
)
promotion v32uint8 circ_add_load_ptr ( const v32uint8 ,
mod_t  ,
const v32uint8 ,
mod_t  ,
const v32uint8 *&   
)
promotion v16int16 circ_add_load_ptr ( const v16int16 ,
mod_t  ,
const v16int16 ,
mod_t  ,
const v16int16 *&   
)
promotion v8cint16 circ_add_load_ptr ( const v8cint16 ,
mod_t  ,
const v8cint16 ,
mod_t  ,
const v8cint16 *&   
)
promotion v8int32 circ_add_load_ptr ( const v8int32 ,
mod_t  ,
const v8int32 ,
mod_t  ,
const v8int32 *&   
)
promotion v4cint32 circ_add_load_ptr ( const v4cint32 ,
mod_t  ,
const v4cint32 ,
mod_t  ,
const v4cint32 *&   
)
promotion v4int64 circ_add_load_ptr ( const v4int64 ,
mod_t  ,
const v4int64 ,
mod_t  ,
const v4int64 *&   
)
promotion v2cint64 circ_add_load_ptr ( const v2cint64 ,
mod_t  ,
const v2cint64 ,
mod_t  ,
const v2cint64 *&   
)
promotion v2int128 circ_add_load_ptr ( const v2int128 ,
mod_t  ,
const v2int128 ,
mod_t  ,
const v2int128 *&   
)
promotion v8float circ_add_load_ptr ( const v8float ,
mod_t  ,
const v8float ,
mod_t  ,
const v8float *&   
)
promotion v4cfloat circ_add_load_ptr ( const v4cfloat ,
mod_t  ,
const v4cfloat ,
mod_t  ,
const v4cfloat *&   
)
promotion void circ_add_store_ptr ( const v32int8 ,
v32int8  ,
mod_t  ,
const v32int8 ,
mod_t  ,
const v32int8 *&   
)
promotion void circ_add_store_ptr ( const v32uint8 ,
v32uint8  ,
mod_t  ,
const v32uint8 ,
mod_t  ,
const v32uint8 *&   
)
promotion void circ_add_store_ptr ( const v16int16 ,
v16int16  ,
mod_t  ,
const v16int16 ,
mod_t  ,
const v16int16 *&   
)
promotion void circ_add_store_ptr ( const v8cint16 ,
v8cint16  ,
mod_t  ,
const v8cint16 ,
mod_t  ,
const v8cint16 *&   
)
promotion void circ_add_store_ptr ( const v8int32 ,
v8int32  ,
mod_t  ,
const v8int32 ,
mod_t  ,
const v8int32 *&   
)
promotion void circ_add_store_ptr ( const v4cint32 ,
v4cint32  ,
mod_t  ,
const v4cint32 ,
mod_t  ,
const v4cint32 *&   
)
promotion void circ_add_store_ptr ( const v4int64 ,
v4int64  ,
mod_t  ,
const v4int64 ,
mod_t  ,
const v4int64 *&   
)
promotion void circ_add_store_ptr ( const v2cint64 ,
v2cint64  ,
mod_t  ,
const v2cint64 ,
mod_t  ,
const v2cint64 *&   
)
promotion void circ_add_store_ptr ( const v2int128 ,
v2int128  ,
mod_t  ,
const v2int128 ,
mod_t  ,
const v2int128 *&   
)
promotion void circ_add_store_ptr ( const v8float ,
v8float  ,
mod_t  ,
const v8float ,
mod_t  ,
const v8float *&   
)
promotion void circ_add_store_ptr ( const v4cfloat ,
v4cfloat  ,
mod_t  ,
const v4cfloat ,
mod_t  ,
const v4cfloat *&   
)
void* cyclic_add ( void *  a,
int  offs,
void *restrict  start,
int  len 
)

Cyclic increment or decrement of a pointer within a buffer

Parameters
a]Current circular buffer pointer position
offs]Number of elements to increment the pointer by (each step is the size of the input type)
start]Pointer to the start of the circular buffer (must be aligned to the vector size)
len]Size of the circular buffer (number of elements of the datatype of the pointer)
Note
The AIE hardware has a load+cyclic_add instruction which loads from a pointer and post-modifies it in the same cycle. In case the vector size is 256-bit and the pointer is only 128-bit aligned and it points right before the end of the buffer, this instruction will load the first 128-bit from the end of the buffer and the second 128-bit from the beginning of the buffer.
If the load and the cyclic_add are executed in separate instructions, the above behavior is not achieved. Hence, in typical source code, the user writes the load and the cyclic_add on the pointer as separate intrinsics and relies on the compiler to combine the two if possible. That means the unaligned behavior is not guaranteed in this case. In x86 compilation, the two intrinsics will always be executed sequentially and the unaligned load across buffer boundaries will never work.
Therefore, if you want to load unaligned across cyclic buffer boundaries, please use the cyclic_add_unaligned_load() intrinsic.