AI Engine-ML v2 Intrinsics User Guide  v2025.1
Loading...
Searching...
No Matches

Intrinsics allowing you perform vector shuffles. More...

Topics

 Illustration of Shuffle Modes
 

Shuffle two vectors

Shuffle two vectors of the same datatype according to a given mode.

Parameters
aFirst vector to Shuffle
bSecond vector to Shuffle
modeshuffle mode (of type eInterleaveMode)
Returns
Shuffle vector
v16acc32 shuffle (v16acc32 a, v16acc32 b, unsigned int mode)
 
v16int32 shuffle (v16int32 a, v16int32 b, unsigned int mode)
 
v32int16 shuffle (v32int16 a, v32int16 b, unsigned int mode)
 
v64int8 shuffle (v64int8 a, v64int8 b, unsigned int mode)
 
v128int4 shuffle (v128int4 a, v128int4 b, unsigned int mode)
 
v16uint32 shuffle (v16uint32 a, v16uint32 b, unsigned int mode)
 
v32uint16 shuffle (v32uint16 a, v32uint16 b, unsigned int mode)
 
v64uint8 shuffle (v64uint8 a, v64uint8 b, unsigned int mode)
 
v128uint4 shuffle (v128uint4 a, v128uint4 b, unsigned int mode)
 
v16cbfloat16 shuffle (v16cbfloat16 a, v16cbfloat16 b, unsigned int mode)
 
v8cint32 shuffle (v8cint32 a, v8cint32 b, unsigned int mode)
 
v8cfloat shuffle (v8cfloat a, v8cfloat b, unsigned int mode)
 
v16cint16 shuffle (v16cint16 a, v16cint16 b, unsigned int mode)
 
v32bfloat16 shuffle (v32bfloat16 a, v32bfloat16 b, unsigned int mode)
 
v32float16 shuffle (v32float16 a, v32float16 b, unsigned int mode)
 
v64float8 shuffle (v64float8 a, v64float8 b, unsigned int mode)
 
v64bfloat8 shuffle (v64bfloat8 a, v64bfloat8 b, unsigned int mode)
 
v16float shuffle (v16float a, v16float b, unsigned int mode)
 

Interleave one vector

Shuffle one vector given a shuffle mode

Parameters
aVector to shuffle
modeshuffle mode (of type eInterleaveMode)
Returns
shuffle vector
v16int32 shuffle (v16int32 a, unsigned int mode)
 
v32int16 shuffle (v32int16 a, unsigned int mode)
 
v64int8 shuffle (v64int8 a, unsigned int mode)
 
v128int4 shuffle (v128int4 a, unsigned int mode)
 
v16uint32 shuffle (v16uint32 a, unsigned int mode)
 
v32uint16 shuffle (v32uint16 a, unsigned int mode)
 
v64uint8 shuffle (v64uint8 a, unsigned int mode)
 
v128uint4 shuffle (v128uint4 a, unsigned int mode)
 
v16cbfloat16 shuffle (v16cbfloat16 a, unsigned int mode)
 
v8cint32 shuffle (v8cint32 a, unsigned int mode)
 
v8cfloat shuffle (v8cfloat a, unsigned int mode)
 
v16cint16 shuffle (v16cint16 a, unsigned int mode)
 
v32bfloat16 shuffle (v32bfloat16 a, unsigned int mode)
 
v32float16 shuffle (v32float16 a, unsigned int mode)
 
v64float8 shuffle (v64float8 a, unsigned int mode)
 
v64bfloat8 shuffle (v64bfloat8 a, unsigned int mode)
 
v16float shuffle (v16float a, unsigned int mode)
 

Updating all elements with same value and shuffle

Broadcasts input value to all vector lanes

Parameters
bvalue to be broadcasted
mode(of type eInterleaveMode)
v64int8 shuffle_s8 (int b, unsigned int mode)
 
v32int16 shuffle_s16 (int b, unsigned int mode)
 
v16int32 shuffle_s32 (int b, unsigned int mode)
 
v16int32 shuffle_s64 (mask64 b, unsigned int mode)
 
v16int32 shuffle_v2s32 (v2int32 b, unsigned int mode)
 
v64uint8 shuffle_u8 (unsigned int b, unsigned int mode)
 
v32uint16 shuffle_u16 (unsigned int b, unsigned int mode)
 
v16uint32 shuffle_u32 (unsigned int b, unsigned int mode)
 
v16uint32 shuffle_u64 (mask64 b, unsigned int mode)
 
v16uint32 shuffle_v2u32 (v2uint32 b, unsigned int mode)
 
v16cint16 shuffle_c16 (cint16 b, unsigned int mode)
 
v16cbfloat16 shuffle_cbf16 (v1cbfloat16 b, unsigned int mode)
 
v16cbfloat16 shuffle_cbf16 (cbfloat16 b, unsigned int mode)
 
v8cint32 shuffle_c32 (cint32_w64 b, unsigned int mode)
 
v8cint32 shuffle_c32 (cint32 b, unsigned int mode)
 
v8cfloat shuffle_c32 (cfloat_w64 b, unsigned int mode)
 
v8cfloat shuffle_c32 (cfloat b, unsigned int mode)
 
v32bfloat16 shuffle_bfloat16 (bfloat16 b, unsigned int mode)
 
v32float16 shuffle_float16 (float16 b, unsigned int mode)
 
v64float8 shuffle_float8 (float8 b, unsigned int mode)
 
v64bfloat8 shuffle_bfloat8 (bfloat8 b, unsigned int mode)
 
v16float shuffle_float (float b, unsigned int mode)
 

Interleave a sparse vector

Interleave a sparse vector

Parameters
aVector to interleave
modeInterleaving mode (of type eInterleaveMode)
Returns
interleaved vector
v512int4_sparse shuffle (v512int4_sparse qy, int itlv)
 
v256int8_sparse shuffle (v256int8_sparse qy, int itlv)
 
v128int16_sparse shuffle (v128int16_sparse qy, int itlv)
 
v256int4_sparse shuffle (v256int4_sparse qx, int itlv)
 
v128int8_sparse shuffle (v128int8_sparse qx, int itlv)
 
v64int16_sparse shuffle (v64int16_sparse qx, int itlv)
 
v512uint4_sparse shuffle (v512uint4_sparse qy, int itlv)
 
v256uint8_sparse shuffle (v256uint8_sparse qy, int itlv)
 
v128uint16_sparse shuffle (v128uint16_sparse qy, int itlv)
 
v256uint4_sparse shuffle (v256uint4_sparse qx, int itlv)
 
v128uint8_sparse shuffle (v128uint8_sparse qx, int itlv)
 
v64uint16_sparse shuffle (v64uint16_sparse qx, int itlv)
 

Interleave mx (block floating-point) vectors

Interleave one vector

Parameters
aVector to interleave
modeInterleaving mode (first byte controls mantissa interleaving, second byte controls exponent interleaving)
Returns
interleaved vector
v64mx9 shuffle (v64mx9 a, v64mx9 b, unsigned mode)
 
v128mx6 shuffle (v128mx6 a, v128mx6 b, unsigned mode)
 
v128mx4 shuffle (v128mx4 a, v128mx4 b, unsigned mode)
 
v64mx9 shuffle (v64mx9 a, unsigned mode)
 
v128mx6 shuffle (v128mx6 a, unsigned mode)
 
v128mx4 shuffle (v128mx4 a, unsigned mode)
 

Detailed Description

Intrinsics allowing you perform vector shuffles.

This table gives the different shuffle modes. For an illustration see Illustration of Shuffle Modes.

Element
Size
Matrix
Transpose
Dimensions
Mode Value
Forward operation
(Deinterleaving)
Backward Operation
(Interleaving)
extract low extract high extract low extract high
8-bit 64x2 → 2x64 shuffle_T8_64x2_lo shuffle_T8_64x2_hi shuffle_T8_2x64_lo shuffle_T8_2x64_hi
16-bit 32x2 → 2x32 shuffle_T16_32x2_lo shuffle_T16_32x2_hi shuffle_T16_2x32_lo shuffle_T16_2x32_hi
32-bit 16x2 → 2x16 shuffle_T32_16x2_lo shuffle_T32_16x2_hi shuffle_T32_2x16_lo shuffle_T32_2x16_hi
64-bit 8x2 → 2x8 shuffle_T64_8x2_lo shuffle_T64_8x2_hi shuffle_T64_2x8_lo shuffle_T64_2x8_hi
128-bit 4x2 → 2x4 shuffle_T128_4x2_lo shuffle_T128_4x2_hi shuffle_T128_2x4_lo shuffle_T128_2x4_hi
256-bit 2x2 → 2x2 shuffle_T256_2x2_lo shuffle_T256_2x2_hi shuffle_T256_2x2_lo shuffle_T256_2x2_hi
512-bit 1x2 → 2x1 shuffle_T512_1x2_lo shuffle_T512_1x2_hi shuffle_T512_1x2_lo shuffle_T512_1x2_hi
16-bit 16x4 → 4x16 shuffle_T16_16x4_lo shuffle_T16_16x4_hi shuffle_T16_4x16_lo shuffle_T16_4x16_hi
16-bit 8x4 → 4x8 shuffle_T16_8x4 - shuffle_T16_4x8 -
32-bit 8x4 → 4x8 shuffle_T32_8x4_lo shuffle_T32_8x4_hi shuffle_T32_4x8_lo shuffle_T32_4x8_hi
32-bit 4x4 → 4x4 shuffle_T32_4x4 - shuffle_T32_4x4 -
64-bit 4x4 → 4x4 shuffle_T64_4x4_lo shuffle_T64_4x4_hi shuffle_T64_4x4_lo shuffle_T64_4x4_hi
8-bit 8x8 → 8x8 shuffle_T8_8x8 - shuffle_T8_8x8 -
8-bit 16x4 → 4x16 shuffle_T8_16x4 - shuffle_T8_4x16 -
8-bit 16x[0,1,2,3] → 16x[3,2,1,0] shuffle_T8_1x4_flip - shuffle_T8_1x4_flip -
16-bit 16x[0,1] → 16x[1,0] shuffle_T16_1x2_flip - shuffle_T16_1x2_flip -
32-bit 1x[0..15] → 1x[15..0] shuffle_T32_1x16_flip - shuffle_T32_1x16_flip -
16-bit 4x4 → 4x4 shuffle_T16_4x4 - shuffle_T16_4x4 -
16-bit 4x2 → 2x4 shuffle_T16_4x2 - shuffle_T16_2x4 -
16-bit 8x2 → 2x8 shuffle_T16_8x2 - shuffle_T16_2x8 -
16-bit 16x2 → 2x16 shuffle_T16_16x2 - shuffle_T16_2x16 -
8-bit 8x4 → 4x8 shuffle_T8_8x4 - shuffle_T8_4x8 -
32-bit 8x2 → 2x8 shuffle_T32_8x2 - shuffle_T32_2x8 -
64-bit 4x2 → 2x4 shuffle_T64_4x2 - shuffle_T8_2x4 -
16-bit 8x8 → 8x8 shuffle_T16_8x8 - shuffle_T8_8x8 -

Function Documentation

◆ shuffle() [1/53]

v128int16_sparse shuffle ( v128int16_sparse qy,
int itlv )

◆ shuffle() [2/53]

v128int4 shuffle ( v128int4 a,
unsigned int mode )

◆ shuffle() [3/53]

v128int4 shuffle ( v128int4 a,
v128int4 b,
unsigned int mode )

◆ shuffle() [4/53]

v128int8_sparse shuffle ( v128int8_sparse qx,
int itlv )

◆ shuffle() [5/53]

v128mx4 shuffle ( v128mx4 a,
unsigned mode )

◆ shuffle() [6/53]

v128mx4 shuffle ( v128mx4 a,
v128mx4 b,
unsigned mode )

◆ shuffle() [7/53]

v128mx6 shuffle ( v128mx6 a,
unsigned mode )

◆ shuffle() [8/53]

v128mx6 shuffle ( v128mx6 a,
v128mx6 b,
unsigned mode )

◆ shuffle() [9/53]

v128uint16_sparse shuffle ( v128uint16_sparse qy,
int itlv )

◆ shuffle() [10/53]

v128uint4 shuffle ( v128uint4 a,
unsigned int mode )

◆ shuffle() [11/53]

v128uint4 shuffle ( v128uint4 a,
v128uint4 b,
unsigned int mode )

◆ shuffle() [12/53]

v128uint8_sparse shuffle ( v128uint8_sparse qx,
int itlv )

◆ shuffle() [13/53]

v16acc32 shuffle ( v16acc32 a,
v16acc32 b,
unsigned int mode )

◆ shuffle() [14/53]

v16cbfloat16 shuffle ( v16cbfloat16 a,
unsigned int mode )

◆ shuffle() [15/53]

v16cbfloat16 shuffle ( v16cbfloat16 a,
v16cbfloat16 b,
unsigned int mode )

◆ shuffle() [16/53]

v16cint16 shuffle ( v16cint16 a,
unsigned int mode )

◆ shuffle() [17/53]

v16cint16 shuffle ( v16cint16 a,
v16cint16 b,
unsigned int mode )

◆ shuffle() [18/53]

v16float shuffle ( v16float a,
unsigned int mode )

◆ shuffle() [19/53]

v16float shuffle ( v16float a,
v16float b,
unsigned int mode )

◆ shuffle() [20/53]

v16int32 shuffle ( v16int32 a,
unsigned int mode )

◆ shuffle() [21/53]

v16int32 shuffle ( v16int32 a,
v16int32 b,
unsigned int mode )

◆ shuffle() [22/53]

v16uint32 shuffle ( v16uint32 a,
unsigned int mode )

◆ shuffle() [23/53]

v16uint32 shuffle ( v16uint32 a,
v16uint32 b,
unsigned int mode )

◆ shuffle() [24/53]

v256int4_sparse shuffle ( v256int4_sparse qx,
int itlv )

◆ shuffle() [25/53]

v256int8_sparse shuffle ( v256int8_sparse qy,
int itlv )

◆ shuffle() [26/53]

v256uint4_sparse shuffle ( v256uint4_sparse qx,
int itlv )

◆ shuffle() [27/53]

v256uint8_sparse shuffle ( v256uint8_sparse qy,
int itlv )

◆ shuffle() [28/53]

v32bfloat16 shuffle ( v32bfloat16 a,
unsigned int mode )

◆ shuffle() [29/53]

v32bfloat16 shuffle ( v32bfloat16 a,
v32bfloat16 b,
unsigned int mode )

◆ shuffle() [30/53]

v32float16 shuffle ( v32float16 a,
unsigned int mode )

◆ shuffle() [31/53]

v32float16 shuffle ( v32float16 a,
v32float16 b,
unsigned int mode )

◆ shuffle() [32/53]

v32int16 shuffle ( v32int16 a,
unsigned int mode )

◆ shuffle() [33/53]

v32int16 shuffle ( v32int16 a,
v32int16 b,
unsigned int mode )

◆ shuffle() [34/53]

v32uint16 shuffle ( v32uint16 a,
unsigned int mode )

◆ shuffle() [35/53]

v32uint16 shuffle ( v32uint16 a,
v32uint16 b,
unsigned int mode )

◆ shuffle() [36/53]

v512int4_sparse shuffle ( v512int4_sparse qy,
int itlv )

◆ shuffle() [37/53]

v512uint4_sparse shuffle ( v512uint4_sparse qy,
int itlv )

◆ shuffle() [38/53]

v64bfloat8 shuffle ( v64bfloat8 a,
unsigned int mode )

◆ shuffle() [39/53]

v64bfloat8 shuffle ( v64bfloat8 a,
v64bfloat8 b,
unsigned int mode )

◆ shuffle() [40/53]

v64float8 shuffle ( v64float8 a,
unsigned int mode )

◆ shuffle() [41/53]

v64float8 shuffle ( v64float8 a,
v64float8 b,
unsigned int mode )

◆ shuffle() [42/53]

v64int16_sparse shuffle ( v64int16_sparse qx,
int itlv )

◆ shuffle() [43/53]

v64int8 shuffle ( v64int8 a,
unsigned int mode )

◆ shuffle() [44/53]

v64int8 shuffle ( v64int8 a,
v64int8 b,
unsigned int mode )

◆ shuffle() [45/53]

v64mx9 shuffle ( v64mx9 a,
unsigned mode )

◆ shuffle() [46/53]

v64mx9 shuffle ( v64mx9 a,
v64mx9 b,
unsigned mode )

◆ shuffle() [47/53]

v64uint16_sparse shuffle ( v64uint16_sparse qx,
int itlv )

◆ shuffle() [48/53]

v64uint8 shuffle ( v64uint8 a,
unsigned int mode )

◆ shuffle() [49/53]

v64uint8 shuffle ( v64uint8 a,
v64uint8 b,
unsigned int mode )

◆ shuffle() [50/53]

v8cfloat shuffle ( v8cfloat a,
unsigned int mode )

◆ shuffle() [51/53]

v8cfloat shuffle ( v8cfloat a,
v8cfloat b,
unsigned int mode )

◆ shuffle() [52/53]

v8cint32 shuffle ( v8cint32 a,
unsigned int mode )

◆ shuffle() [53/53]

v8cint32 shuffle ( v8cint32 a,
v8cint32 b,
unsigned int mode )

◆ shuffle_bfloat16()

v32bfloat16 shuffle_bfloat16 ( bfloat16 b,
unsigned int mode )

◆ shuffle_bfloat8()

v64bfloat8 shuffle_bfloat8 ( bfloat8 b,
unsigned int mode )

◆ shuffle_c16()

v16cint16 shuffle_c16 ( cint16 b,
unsigned int mode )

◆ shuffle_c32() [1/4]

v8cfloat shuffle_c32 ( cfloat b,
unsigned int mode )

◆ shuffle_c32() [2/4]

v8cfloat shuffle_c32 ( cfloat_w64 b,
unsigned int mode )

◆ shuffle_c32() [3/4]

v8cint32 shuffle_c32 ( cint32 b,
unsigned int mode )

◆ shuffle_c32() [4/4]

v8cint32 shuffle_c32 ( cint32_w64 b,
unsigned int mode )

◆ shuffle_cbf16() [1/2]

v16cbfloat16 shuffle_cbf16 ( cbfloat16 b,
unsigned int mode )

◆ shuffle_cbf16() [2/2]

v16cbfloat16 shuffle_cbf16 ( v1cbfloat16 b,
unsigned int mode )

◆ shuffle_float()

v16float shuffle_float ( float b,
unsigned int mode )

◆ shuffle_float16()

v32float16 shuffle_float16 ( float16 b,
unsigned int mode )

◆ shuffle_float8()

v64float8 shuffle_float8 ( float8 b,
unsigned int mode )

◆ shuffle_s16()

v32int16 shuffle_s16 ( int b,
unsigned int mode )

◆ shuffle_s32()

v16int32 shuffle_s32 ( int b,
unsigned int mode )

◆ shuffle_s64()

v16int32 shuffle_s64 ( mask64 b,
unsigned int mode )

◆ shuffle_s8()

v64int8 shuffle_s8 ( int b,
unsigned int mode )

◆ shuffle_u16()

v32uint16 shuffle_u16 ( unsigned int b,
unsigned int mode )

◆ shuffle_u32()

v16uint32 shuffle_u32 ( unsigned int b,
unsigned int mode )

◆ shuffle_u64()

v16uint32 shuffle_u64 ( mask64 b,
unsigned int mode )

◆ shuffle_u8()

v64uint8 shuffle_u8 ( unsigned int b,
unsigned int mode )

◆ shuffle_v2s32()

v16int32 shuffle_v2s32 ( v2int32 b,
unsigned int mode )

◆ shuffle_v2u32()

v16uint32 shuffle_v2u32 ( v2uint32 b,
unsigned int mode )