AI Engine-ML v2 Intrinsics User Guide  v2025.1
Loading...
Searching...
No Matches
Concatenate vectors

Vector concat intrinsic functions allow concatenation of vector values to create a larger one. More...

Topics

 Concatenate four vectors
 
 Concatenate two vectors
 

Functions

v4cacc64 concat (v2cacc64 a, v2cacc64 b)
 
v8cacc64 concat (v2cacc64 a, v2cacc64 b, v2cacc64 c, v2cacc64 d)
 
v8acc64 concat (v4acc64 a, v4acc64 b)
 
v16acc64 concat (v4acc64 a, v4acc64 b, v4acc64 c, v4acc64 d)
 
v8caccfloat concat (v4caccfloat a, v4caccfloat b)
 
v16caccfloat concat (v4caccfloat a, v4caccfloat b, v4caccfloat c, v4caccfloat d)
 
v16acc32 concat (v8acc32 a, v8acc32 b)
 
v32acc32 concat (v8acc32 a, v8acc32 b, v8acc32 c, v8acc32 d)
 
v16accfloat concat (v8accfloat a, v8accfloat b)
 
v32accfloat concat (v8accfloat a, v8accfloat b, v8accfloat c, v8accfloat d)
 

Concat v64int4 vectors

v128int4 concat (v64int4 a0, v64int4 a1)
 
v256int4 concat (v64int4 a0, v64int4 a1, v64int4 a2, v64int4 a3)
 

Concat v64uint4 vectors

v128uint4 concat (v64uint4 a0, v64uint4 a1)
 
v256uint4 concat (v64uint4 a0, v64uint4 a1, v64uint4 a2, v64uint4 a3)
 

Concat v32int8 vectors

v64int8 concat (v32int8 a0, v32int8 a1)
 
v128int8 concat (v32int8 a0, v32int8 a1, v32int8 a2, v32int8 a3)
 

Concat v32uint8 vectors

v64uint8 concat (v32uint8 a0, v32uint8 a1)
 
v128uint8 concat (v32uint8 a0, v32uint8 a1, v32uint8 a2, v32uint8 a3)
 

Concat v8cint16 vectors

v16cint16 concat (v8cint16 a0, v8cint16 a1)
 
v32cint16 concat (v8cint16 a0, v8cint16 a1, v8cint16 a2, v8cint16 a3)
 

Concat v16int16 vectors

v32int16 concat (v16int16 a0, v16int16 a1)
 
v64int16 concat (v16int16 a0, v16int16 a1, v16int16 a2, v16int16 a3)
 

Concat v16uint16 vectors

v32uint16 concat (v16uint16 a0, v16uint16 a1)
 
v64uint16 concat (v16uint16 a0, v16uint16 a1, v16uint16 a2, v16uint16 a3)
 

Concat v4cint32 vectors

v8cint32 concat (v4cint32 a0, v4cint32 a1)
 
v16cint32 concat (v4cint32 a0, v4cint32 a1, v4cint32 a2, v4cint32 a3)
 

Concat v8int32 vectors

v16int32 concat (v8int32 a0, v8int32 a1)
 
v32int32 concat (v8int32 a0, v8int32 a1, v8int32 a2, v8int32 a3)
 

Concat v8uint32 vectors

v16uint32 concat (v8uint32 a0, v8uint32 a1)
 
v32uint32 concat (v8uint32 a0, v8uint32 a1, v8uint32 a2, v8uint32 a3)
 

Concat v16bfloat16 vectors

v32bfloat16 concat (v16bfloat16 a0, v16bfloat16 a1)
 
v64bfloat16 concat (v16bfloat16 a0, v16bfloat16 a1, v16bfloat16 a2, v16bfloat16 a3)
 

Concat v16float16 vectors

v32float16 concat (v16float16 a0, v16float16 a1)
 
v64float16 concat (v16float16 a0, v16float16 a1, v16float16 a2, v16float16 a3)
 

Concat v32bfloat8 vectors

v64bfloat8 concat (v32bfloat8 a0, v32bfloat8 a1)
 
v128bfloat8 concat (v32bfloat8 a0, v32bfloat8 a1, v32bfloat8 a2, v32bfloat8 a3)
 

Concat v32float8 vectors

v64float8 concat (v32float8 a0, v32float8 a1)
 
v128float8 concat (v32float8 a0, v32float8 a1, v32float8 a2, v32float8 a3)
 

Concat v8float vectors

v16float concat (v8float a0, v8float a1)
 
v32float concat (v8float a0, v8float a1, v8float a2, v8float a3)
 

Concat v8cbfloat16 vectors

v16cbfloat16 concat (v8cbfloat16 a0, v8cbfloat16 a1)
 
v32cbfloat16 concat (v8cbfloat16 a0, v8cbfloat16 a1, v8cbfloat16 a2, v8cbfloat16 a3)
 

Concat v4cfloat vectors

v8cfloat concat (v4cfloat a0, v4cfloat a1)
 
v16cfloat concat (v4cfloat a0, v4cfloat a1, v4cfloat a2, v4cfloat a3)
 

Concat v128int4 vectors

v256int4 concat (v128int4 a0, v128int4 a1)
 

Concat v128uint4 vectors

v256uint4 concat (v128uint4 a0, v128uint4 a1)
 

Concat v64int8 vectors

v128int8 concat (v64int8 a0, v64int8 a1)
 

Concat v64uint8 vectors

v128uint8 concat (v64uint8 a0, v64uint8 a1)
 

Concat v16cint16 vectors

v32cint16 concat (v16cint16 a0, v16cint16 a1)
 

Concat v32int16 vectors

v64int16 concat (v32int16 a0, v32int16 a1)
 

Concat v32uint16 vectors

v64uint16 concat (v32uint16 a0, v32uint16 a1)
 

Concat v8cint32 vectors

v16cint32 concat (v8cint32 a0, v8cint32 a1)
 

Concat v16int32 vectors

v32int32 concat (v16int32 a0, v16int32 a1)
 

Concat v16uint32 vectors

v32uint32 concat (v16uint32 a0, v16uint32 a1)
 

Concat v16accfloat vectors

v32accfloat concat (v16accfloat a0, v16accfloat a1)
 
v64accfloat concat (v16accfloat a0, v16accfloat a1, v16accfloat a2, v16accfloat a3)
 

Concat v8caccfloat vectors

v16caccfloat concat (v8caccfloat a0, v8caccfloat a1)
 
v32caccfloat concat (v8caccfloat a0, v8caccfloat a1, v8caccfloat a2, v8caccfloat a3)
 

Concat v16acc32 vectors

v32acc32 concat (v16acc32 a0, v16acc32 a1)
 
v64acc32 concat (v16acc32 a0, v16acc32 a1, v16acc32 a2, v16acc32 a3)
 

Concat v8acc64 vectors

v16acc64 concat (v8acc64 a0, v8acc64 a1)
 
v32acc64 concat (v8acc64 a0, v8acc64 a1, v8acc64 a2, v8acc64 a3)
 

Concat v4cacc64 vectors

v8cacc64 concat (v4cacc64 a0, v4cacc64 a1)
 
v16cacc64 concat (v4cacc64 a0, v4cacc64 a1, v4cacc64 a2, v4cacc64 a3)
 

Concat v32bfloat16 vectors

v64bfloat16 concat (v32bfloat16 a0, v32bfloat16 a1)
 

Concat v32float16 vectors

v64float16 concat (v32float16 a0, v32float16 a1)
 

Concat v64bfloat8 vectors

v128bfloat8 concat (v64bfloat8 a0, v64bfloat8 a1)
 

Concat v64float8 vectors

v128float8 concat (v64float8 a0, v64float8 a1)
 

Concat v16float vectors

v32float concat (v16float a0, v16float a1)
 

Concat v16cbfloat16 vectors

v32cbfloat16 concat (v16cbfloat16 a0, v16cbfloat16 a1)
 

Concat v8cfloat vectors

v16cfloat concat (v8cfloat a0, v8cfloat a1)
 

Concat v32accfloat vectors

v64accfloat concat (v32accfloat a0, v32accfloat a1)
 

Concat v16caccfloat vectors

v32caccfloat concat (v16caccfloat a0, v16caccfloat a1)
 

Concat v32acc32 vectors

v64acc32 concat (v32acc32 a0, v32acc32 a1)
 

Concat v16acc64 vectors

v32acc64 concat (v16acc64 a0, v16acc64 a1)
 

Concat v8cacc64 vectors

v16cacc64 concat (v8cacc64 a0, v8cacc64 a1)
 

Detailed Description

Vector concat intrinsic functions allow concatenation of vector values to create a larger one.

Below the buffer sizes are as follows: W - 256 bit X - 512 bit Y - 1024 bit

For more information see Integer Vector Types.

Note
All intrinsics require a compile time constant for the idx parameter except those in either of the following two forms:
  • upd_w(Y buf,int idx,W val)

upd_hi and upd_lo intrinsic functions

Update the top half or bottom half of the lanes within a data type.

upd_w({X,Y} buf,int idx,W val) 256-bit intrinsic functions

upd_w(buf,0...3,val) update the successive 256-bit lanes into a 512/1024 bit vector.

The following example shows the update of a large 32-way complex vector 16 elements at-a-time using a 256-bit update. These updates are also pipelined.

const v16int16 * input = d_in;
...
sbuff = upd_w(sbuff,0, *input_++); // 00++|08++|____|____ ____|____|____|____
sbuff = upd_w(sbuff,1, *input_++); // 00..|08..|16++|24++ ____|____|____|____
Definition me_chess.h:534
Definition me_chess.h:510
v64int16 undef_v64int16()

wset, xset, yset

Same as update but set value of new buf and not one that exists already. For example:

wset_w(idx,val) is the same as upd_w(undef_type(),idx,val)

concat, upd

In order to concatenate two vectors into a new vector twice the size you can either use the concat intrinsic or the correspondent upd intrinsics.

Both options are valid and should generally give the same performance.

Function Documentation

◆ concat() [1/76]

v256int4 concat ( v128int4 a0,
v128int4 a1 )

◆ concat() [2/76]

v256uint4 concat ( v128uint4 a0,
v128uint4 a1 )

◆ concat() [3/76]

v32acc32 concat ( v16acc32 a0,
v16acc32 a1 )

◆ concat() [4/76]

v64acc32 concat ( v16acc32 a0,
v16acc32 a1,
v16acc32 a2,
v16acc32 a3 )

◆ concat() [5/76]

v32acc64 concat ( v16acc64 a0,
v16acc64 a1 )

◆ concat() [6/76]

v32accfloat concat ( v16accfloat a0,
v16accfloat a1 )

◆ concat() [7/76]

◆ concat() [8/76]

v32bfloat16 concat ( v16bfloat16 a0,
v16bfloat16 a1 )

◆ concat() [9/76]

◆ concat() [10/76]

◆ concat() [11/76]

◆ concat() [12/76]

v32cint16 concat ( v16cint16 a0,
v16cint16 a1 )

◆ concat() [13/76]

v32float concat ( v16float a0,
v16float a1 )

◆ concat() [14/76]

v32float16 concat ( v16float16 a0,
v16float16 a1 )

◆ concat() [15/76]

◆ concat() [16/76]

v32int16 concat ( v16int16 a0,
v16int16 a1 )

◆ concat() [17/76]

v64int16 concat ( v16int16 a0,
v16int16 a1,
v16int16 a2,
v16int16 a3 )

◆ concat() [18/76]

v32int32 concat ( v16int32 a0,
v16int32 a1 )

◆ concat() [19/76]

v32uint16 concat ( v16uint16 a0,
v16uint16 a1 )

◆ concat() [20/76]

v64uint16 concat ( v16uint16 a0,
v16uint16 a1,
v16uint16 a2,
v16uint16 a3 )

◆ concat() [21/76]

v32uint32 concat ( v16uint32 a0,
v16uint32 a1 )

◆ concat() [22/76]

v4cacc64 concat ( v2cacc64 a,
v2cacc64 b )

◆ concat() [23/76]

v8cacc64 concat ( v2cacc64 a,
v2cacc64 b,
v2cacc64 c,
v2cacc64 d )

◆ concat() [24/76]

v64acc32 concat ( v32acc32 a0,
v32acc32 a1 )

◆ concat() [25/76]

v64accfloat concat ( v32accfloat a0,
v32accfloat a1 )

◆ concat() [26/76]

v64bfloat16 concat ( v32bfloat16 a0,
v32bfloat16 a1 )

◆ concat() [27/76]

v64bfloat8 concat ( v32bfloat8 a0,
v32bfloat8 a1 )

◆ concat() [28/76]

◆ concat() [29/76]

v64float16 concat ( v32float16 a0,
v32float16 a1 )

◆ concat() [30/76]

v64float8 concat ( v32float8 a0,
v32float8 a1 )

◆ concat() [31/76]

v128float8 concat ( v32float8 a0,
v32float8 a1,
v32float8 a2,
v32float8 a3 )

◆ concat() [32/76]

v64int16 concat ( v32int16 a0,
v32int16 a1 )

◆ concat() [33/76]

v64int8 concat ( v32int8 a0,
v32int8 a1 )

◆ concat() [34/76]

v128int8 concat ( v32int8 a0,
v32int8 a1,
v32int8 a2,
v32int8 a3 )

◆ concat() [35/76]

v64uint16 concat ( v32uint16 a0,
v32uint16 a1 )

◆ concat() [36/76]

v64uint8 concat ( v32uint8 a0,
v32uint8 a1 )

◆ concat() [37/76]

v128uint8 concat ( v32uint8 a0,
v32uint8 a1,
v32uint8 a2,
v32uint8 a3 )

◆ concat() [38/76]

v8acc64 concat ( v4acc64 a,
v4acc64 b )

◆ concat() [39/76]

v16acc64 concat ( v4acc64 a,
v4acc64 b,
v4acc64 c,
v4acc64 d )

◆ concat() [40/76]

v8cacc64 concat ( v4cacc64 a0,
v4cacc64 a1 )

◆ concat() [41/76]

v16cacc64 concat ( v4cacc64 a0,
v4cacc64 a1,
v4cacc64 a2,
v4cacc64 a3 )

◆ concat() [42/76]

◆ concat() [43/76]

◆ concat() [44/76]

v8cfloat concat ( v4cfloat a0,
v4cfloat a1 )

◆ concat() [45/76]

v16cfloat concat ( v4cfloat a0,
v4cfloat a1,
v4cfloat a2,
v4cfloat a3 )

◆ concat() [46/76]

v8cint32 concat ( v4cint32 a0,
v4cint32 a1 )

◆ concat() [47/76]

v16cint32 concat ( v4cint32 a0,
v4cint32 a1,
v4cint32 a2,
v4cint32 a3 )

◆ concat() [48/76]

v128bfloat8 concat ( v64bfloat8 a0,
v64bfloat8 a1 )

◆ concat() [49/76]

v128float8 concat ( v64float8 a0,
v64float8 a1 )

◆ concat() [50/76]

v128int4 concat ( v64int4 a0,
v64int4 a1 )

◆ concat() [51/76]

v256int4 concat ( v64int4 a0,
v64int4 a1,
v64int4 a2,
v64int4 a3 )

◆ concat() [52/76]

v128int8 concat ( v64int8 a0,
v64int8 a1 )

◆ concat() [53/76]

v128uint4 concat ( v64uint4 a0,
v64uint4 a1 )

◆ concat() [54/76]

v256uint4 concat ( v64uint4 a0,
v64uint4 a1,
v64uint4 a2,
v64uint4 a3 )

◆ concat() [55/76]

v128uint8 concat ( v64uint8 a0,
v64uint8 a1 )

◆ concat() [56/76]

v16acc32 concat ( v8acc32 a,
v8acc32 b )

◆ concat() [57/76]

v32acc32 concat ( v8acc32 a,
v8acc32 b,
v8acc32 c,
v8acc32 d )

◆ concat() [58/76]

v16acc64 concat ( v8acc64 a0,
v8acc64 a1 )

◆ concat() [59/76]

v32acc64 concat ( v8acc64 a0,
v8acc64 a1,
v8acc64 a2,
v8acc64 a3 )

◆ concat() [60/76]

v16accfloat concat ( v8accfloat a,
v8accfloat b )

◆ concat() [61/76]

◆ concat() [62/76]

v16cacc64 concat ( v8cacc64 a0,
v8cacc64 a1 )

◆ concat() [63/76]

v16caccfloat concat ( v8caccfloat a0,
v8caccfloat a1 )

◆ concat() [64/76]

◆ concat() [65/76]

v16cbfloat16 concat ( v8cbfloat16 a0,
v8cbfloat16 a1 )

◆ concat() [66/76]

◆ concat() [67/76]

v16cfloat concat ( v8cfloat a0,
v8cfloat a1 )

◆ concat() [68/76]

v16cint16 concat ( v8cint16 a0,
v8cint16 a1 )

◆ concat() [69/76]

v32cint16 concat ( v8cint16 a0,
v8cint16 a1,
v8cint16 a2,
v8cint16 a3 )

◆ concat() [70/76]

v16cint32 concat ( v8cint32 a0,
v8cint32 a1 )

◆ concat() [71/76]

v16float concat ( v8float a0,
v8float a1 )

◆ concat() [72/76]

v32float concat ( v8float a0,
v8float a1,
v8float a2,
v8float a3 )

◆ concat() [73/76]

v16int32 concat ( v8int32 a0,
v8int32 a1 )

◆ concat() [74/76]

v32int32 concat ( v8int32 a0,
v8int32 a1,
v8int32 a2,
v8int32 a3 )

◆ concat() [75/76]

v16uint32 concat ( v8uint32 a0,
v8uint32 a1 )

◆ concat() [76/76]

v32uint32 concat ( v8uint32 a0,
v8uint32 a1,
v8uint32 a2,
v8uint32 a3 )