AI Engine-ML v2 Intrinsics User Guide  v2025.1
Loading...
Searching...
No Matches
Insert vector

Vector insert intrinsic functions allow substitution of the lanes within a vector value. More...

Topics

 Insert MX vector components
 
 Insert a vector into a vector
 
 Insert sparse vector components
 

Insert 256-bit in 512-bit register

v128int4 insert (v128int4 a, int idx, v64int4 b)
 
v128uint4 insert (v128uint4 a, int idx, v64uint4 b)
 
v64int8 insert (v64int8 a, int idx, v32int8 b)
 
v64uint8 insert (v64uint8 a, int idx, v32uint8 b)
 
v16cint16 insert (v16cint16 a, int idx, v8cint16 b)
 
v32int16 insert (v32int16 a, int idx, v16int16 b)
 
v32uint16 insert (v32uint16 a, int idx, v16uint16 b)
 
v8cint32 insert (v8cint32 a, int idx, v4cint32 b)
 
v16int32 insert (v16int32 a, int idx, v8int32 b)
 
v16uint32 insert (v16uint32 a, int idx, v8uint32 b)
 
v32bfloat16 insert (v32bfloat16 a, int idx, v16bfloat16 b)
 
v32float16 insert (v32float16 a, int idx, v16float16 b)
 
v64bfloat8 insert (v64bfloat8 a, int idx, v32bfloat8 b)
 
v64float8 insert (v64float8 a, int idx, v32float8 b)
 
v16float insert (v16float a, int idx, v8float b)
 
v16cbfloat16 insert (v16cbfloat16 a, int idx, v8cbfloat16 b)
 
v8cfloat insert (v8cfloat a, int idx, v4cfloat b)
 
v16accfloat insert (v16accfloat a, int idx, v8accfloat b)
 
v8caccfloat insert (v8caccfloat a, int idx, v4caccfloat b)
 
v16acc32 insert (v16acc32 a, int idx, v8acc32 b)
 
v8acc64 insert (v8acc64 a, int idx, v4acc64 b)
 
v4cacc64 insert (v4cacc64 a, int idx, v2cacc64 b)
 

Insert 256-bit in 1024-bit register

v256int4 insert (v256int4 a, int idx, v64int4 b)
 
v256uint4 insert (v256uint4 a, int idx, v64uint4 b)
 
v128int8 insert (v128int8 a, int idx, v32int8 b)
 
v128uint8 insert (v128uint8 a, int idx, v32uint8 b)
 
v32cint16 insert (v32cint16 a, int idx, v8cint16 b)
 
v64int16 insert (v64int16 a, int idx, v16int16 b)
 
v64uint16 insert (v64uint16 a, int idx, v16uint16 b)
 
v16cint32 insert (v16cint32 a, int idx, v4cint32 b)
 
v32int32 insert (v32int32 a, int idx, v8int32 b)
 
v32uint32 insert (v32uint32 a, int idx, v8uint32 b)
 
v64bfloat16 insert (v64bfloat16 a, int idx, v16bfloat16 b)
 
v64float16 insert (v64float16 a, int idx, v16float16 b)
 
v128bfloat8 insert (v128bfloat8 a, int idx, v32bfloat8 b)
 
v128float8 insert (v128float8 a, int idx, v32float8 b)
 
v32float insert (v32float a, int idx, v8float b)
 
v32cbfloat16 insert (v32cbfloat16 a, int idx, v8cbfloat16 b)
 
v16cfloat insert (v16cfloat a, int idx, v4cfloat b)
 
v32accfloat insert (v32accfloat a, int idx, v8accfloat b)
 
v16caccfloat insert (v16caccfloat a, int idx, v4caccfloat b)
 
v32acc32 insert (v32acc32 a, int idx, v8acc32 b)
 
v16acc64 insert (v16acc64 a, int idx, v4acc64 b)
 
v8cacc64 insert (v8cacc64 a, int idx, v2cacc64 b)
 

Insert 512-bit in 1024-bit register

v256int4 insert (v256int4 a, int idx, v128int4 b)
 
v256uint4 insert (v256uint4 a, int idx, v128uint4 b)
 
v128int8 insert (v128int8 a, int idx, v64int8 b)
 
v128uint8 insert (v128uint8 a, int idx, v64uint8 b)
 
v32cint16 insert (v32cint16 a, int idx, v16cint16 b)
 
v64int16 insert (v64int16 a, int idx, v32int16 b)
 
v64uint16 insert (v64uint16 a, int idx, v32uint16 b)
 
v16cint32 insert (v16cint32 a, int idx, v8cint32 b)
 
v32int32 insert (v32int32 a, int idx, v16int32 b)
 
v32uint32 insert (v32uint32 a, int idx, v16uint32 b)
 
v32accfloat insert (v32accfloat a, int idx, v16accfloat b)
 
v16caccfloat insert (v16caccfloat a, int idx, v8caccfloat b)
 
v32acc32 insert (v32acc32 a, int idx, v16acc32 b)
 
v16acc64 insert (v16acc64 a, int idx, v8acc64 b)
 
v8cacc64 insert (v8cacc64 a, int idx, v4cacc64 b)
 
v64bfloat16 insert (v64bfloat16 a, int idx, v32bfloat16 b)
 
v64float16 insert (v64float16 a, int idx, v32float16 b)
 
v128bfloat8 insert (v128bfloat8 a, int idx, v64bfloat8 b)
 
v128float8 insert (v128float8 a, int idx, v64float8 b)
 
v32float insert (v32float a, int idx, v16float b)
 
v32cbfloat16 insert (v32cbfloat16 a, int idx, v16cbfloat16 b)
 
v16cfloat insert (v16cfloat a, int idx, v8cfloat b)
 

Insert 512-bit in 2048-bit register

v64accfloat insert (v64accfloat a, int idx, v16accfloat b)
 
v32caccfloat insert (v32caccfloat a, int idx, v8caccfloat b)
 
v64acc32 insert (v64acc32 a, int idx, v16acc32 b)
 
v32acc64 insert (v32acc64 a, int idx, v8acc64 b)
 
v16cacc64 insert (v16cacc64 a, int idx, v4cacc64 b)
 

Insert 1024-bit in 2048-bit register

v64accfloat insert (v64accfloat a, int idx, v32accfloat b)
 
v32caccfloat insert (v32caccfloat a, int idx, v16caccfloat b)
 
v64acc32 insert (v64acc32 a, int idx, v32acc32 b)
 
v32acc64 insert (v32acc64 a, int idx, v16acc64 b)
 
v16cacc64 insert (v16cacc64 a, int idx, v8cacc64 b)
 

Insert v128mx9 into v256mx9

v256mx9 insert (v256mx9 m, int idx, v128mx9 a)
 

Insert v64mx9 into v256mx9

v256mx9 insert (v256mx9 m, int idx, v64mx9 a)
 

Detailed Description

Vector insert intrinsic functions allow substitution of the lanes within a vector value.

Below the buffer sizes are as follows: W - 256 bit X - 512 bit Y - 1024 bit

For more information see Integer Vector Types.

Note
All intrinsics require a compile time constant for the idx parameter except those in either of the following two forms:
  • upd_w(Y buf,int idx,W val)

upd_hi and upd_lo intrinsic functions

Update the top half or bottom half of the lanes within a data type.

upd_w({X,Y} buf,int idx,W val) 256-bit intrinsic functions

upd_w(buf,0...3,val) update the successive 256-bit lanes into a 512/1024 bit vector.

The following example shows the update of a large 32-way complex vector 16 elements at-a-time using a 256-bit update. These updates are also pipelined.

const v16int16 * input = d_in;
...
sbuff = upd_w(sbuff,0, *input_++); // 00++|08++|____|____ ____|____|____|____
sbuff = upd_w(sbuff,1, *input_++); // 00..|08..|16++|24++ ____|____|____|____
Definition me_chess.h:534
Definition me_chess.h:510
v64int16 undef_v64int16()

concat, upd

In order to concatenate two vectors into a new vector twice the size you can either use the concat intrinsic or the correspondent upd intrinsics.

Both options are valid and should generally give the same performance.

Function Documentation

◆ insert() [1/78]

v128bfloat8 insert ( v128bfloat8 a,
int idx,
v32bfloat8 b )

◆ insert() [2/78]

v128bfloat8 insert ( v128bfloat8 a,
int idx,
v64bfloat8 b )

◆ insert() [3/78]

v128float8 insert ( v128float8 a,
int idx,
v32float8 b )

◆ insert() [4/78]

v128float8 insert ( v128float8 a,
int idx,
v64float8 b )

◆ insert() [5/78]

v128int4 insert ( v128int4 a,
int idx,
v64int4 b )

◆ insert() [6/78]

v128int8 insert ( v128int8 a,
int idx,
v32int8 b )

◆ insert() [7/78]

v128int8 insert ( v128int8 a,
int idx,
v64int8 b )

◆ insert() [8/78]

v128uint4 insert ( v128uint4 a,
int idx,
v64uint4 b )

◆ insert() [9/78]

v128uint8 insert ( v128uint8 a,
int idx,
v32uint8 b )

◆ insert() [10/78]

v128uint8 insert ( v128uint8 a,
int idx,
v64uint8 b )

◆ insert() [11/78]

v16acc32 insert ( v16acc32 a,
int idx,
v8acc32 b )

◆ insert() [12/78]

v16acc64 insert ( v16acc64 a,
int idx,
v4acc64 b )

◆ insert() [13/78]

v16acc64 insert ( v16acc64 a,
int idx,
v8acc64 b )

◆ insert() [14/78]

v16accfloat insert ( v16accfloat a,
int idx,
v8accfloat b )

◆ insert() [15/78]

v16cacc64 insert ( v16cacc64 a,
int idx,
v4cacc64 b )

◆ insert() [16/78]

v16cacc64 insert ( v16cacc64 a,
int idx,
v8cacc64 b )

◆ insert() [17/78]

v16caccfloat insert ( v16caccfloat a,
int idx,
v4caccfloat b )

◆ insert() [18/78]

v16caccfloat insert ( v16caccfloat a,
int idx,
v8caccfloat b )

◆ insert() [19/78]

v16cbfloat16 insert ( v16cbfloat16 a,
int idx,
v8cbfloat16 b )

◆ insert() [20/78]

v16cfloat insert ( v16cfloat a,
int idx,
v4cfloat b )

◆ insert() [21/78]

v16cfloat insert ( v16cfloat a,
int idx,
v8cfloat b )

◆ insert() [22/78]

v16cint16 insert ( v16cint16 a,
int idx,
v8cint16 b )

◆ insert() [23/78]

v16cint32 insert ( v16cint32 a,
int idx,
v4cint32 b )

◆ insert() [24/78]

v16cint32 insert ( v16cint32 a,
int idx,
v8cint32 b )

◆ insert() [25/78]

v16float insert ( v16float a,
int idx,
v8float b )

◆ insert() [26/78]

v16int32 insert ( v16int32 a,
int idx,
v8int32 b )

◆ insert() [27/78]

v16uint32 insert ( v16uint32 a,
int idx,
v8uint32 b )

◆ insert() [28/78]

v256int4 insert ( v256int4 a,
int idx,
v128int4 b )

◆ insert() [29/78]

v256int4 insert ( v256int4 a,
int idx,
v64int4 b )

◆ insert() [30/78]

v256mx9 insert ( v256mx9 m,
int idx,
v128mx9 a )

◆ insert() [31/78]

v256mx9 insert ( v256mx9 m,
int idx,
v64mx9 a )

◆ insert() [32/78]

v256uint4 insert ( v256uint4 a,
int idx,
v128uint4 b )

◆ insert() [33/78]

v256uint4 insert ( v256uint4 a,
int idx,
v64uint4 b )

◆ insert() [34/78]

v32acc32 insert ( v32acc32 a,
int idx,
v16acc32 b )

◆ insert() [35/78]

v32acc32 insert ( v32acc32 a,
int idx,
v8acc32 b )

◆ insert() [36/78]

v32acc64 insert ( v32acc64 a,
int idx,
v16acc64 b )

◆ insert() [37/78]

v32acc64 insert ( v32acc64 a,
int idx,
v8acc64 b )

◆ insert() [38/78]

v32accfloat insert ( v32accfloat a,
int idx,
v16accfloat b )

◆ insert() [39/78]

v32accfloat insert ( v32accfloat a,
int idx,
v8accfloat b )

◆ insert() [40/78]

v32bfloat16 insert ( v32bfloat16 a,
int idx,
v16bfloat16 b )

◆ insert() [41/78]

v32caccfloat insert ( v32caccfloat a,
int idx,
v16caccfloat b )

◆ insert() [42/78]

v32caccfloat insert ( v32caccfloat a,
int idx,
v8caccfloat b )

◆ insert() [43/78]

v32cbfloat16 insert ( v32cbfloat16 a,
int idx,
v16cbfloat16 b )

◆ insert() [44/78]

v32cbfloat16 insert ( v32cbfloat16 a,
int idx,
v8cbfloat16 b )

◆ insert() [45/78]

v32cint16 insert ( v32cint16 a,
int idx,
v16cint16 b )

◆ insert() [46/78]

v32cint16 insert ( v32cint16 a,
int idx,
v8cint16 b )

◆ insert() [47/78]

v32float insert ( v32float a,
int idx,
v16float b )

◆ insert() [48/78]

v32float insert ( v32float a,
int idx,
v8float b )

◆ insert() [49/78]

v32float16 insert ( v32float16 a,
int idx,
v16float16 b )

◆ insert() [50/78]

v32int16 insert ( v32int16 a,
int idx,
v16int16 b )

◆ insert() [51/78]

v32int32 insert ( v32int32 a,
int idx,
v16int32 b )

◆ insert() [52/78]

v32int32 insert ( v32int32 a,
int idx,
v8int32 b )

◆ insert() [53/78]

v32uint16 insert ( v32uint16 a,
int idx,
v16uint16 b )

◆ insert() [54/78]

v32uint32 insert ( v32uint32 a,
int idx,
v16uint32 b )

◆ insert() [55/78]

v32uint32 insert ( v32uint32 a,
int idx,
v8uint32 b )

◆ insert() [56/78]

v4cacc64 insert ( v4cacc64 a,
int idx,
v2cacc64 b )

◆ insert() [57/78]

v64acc32 insert ( v64acc32 a,
int idx,
v16acc32 b )

◆ insert() [58/78]

v64acc32 insert ( v64acc32 a,
int idx,
v32acc32 b )

◆ insert() [59/78]

v64accfloat insert ( v64accfloat a,
int idx,
v16accfloat b )

◆ insert() [60/78]

v64accfloat insert ( v64accfloat a,
int idx,
v32accfloat b )

◆ insert() [61/78]

v64bfloat16 insert ( v64bfloat16 a,
int idx,
v16bfloat16 b )

◆ insert() [62/78]

v64bfloat16 insert ( v64bfloat16 a,
int idx,
v32bfloat16 b )

◆ insert() [63/78]

v64bfloat8 insert ( v64bfloat8 a,
int idx,
v32bfloat8 b )

◆ insert() [64/78]

v64float16 insert ( v64float16 a,
int idx,
v16float16 b )

◆ insert() [65/78]

v64float16 insert ( v64float16 a,
int idx,
v32float16 b )

◆ insert() [66/78]

v64float8 insert ( v64float8 a,
int idx,
v32float8 b )

◆ insert() [67/78]

v64int16 insert ( v64int16 a,
int idx,
v16int16 b )

◆ insert() [68/78]

v64int16 insert ( v64int16 a,
int idx,
v32int16 b )

◆ insert() [69/78]

v64int8 insert ( v64int8 a,
int idx,
v32int8 b )

◆ insert() [70/78]

v64uint16 insert ( v64uint16 a,
int idx,
v16uint16 b )

◆ insert() [71/78]

v64uint16 insert ( v64uint16 a,
int idx,
v32uint16 b )

◆ insert() [72/78]

v64uint8 insert ( v64uint8 a,
int idx,
v32uint8 b )

◆ insert() [73/78]

v8acc64 insert ( v8acc64 a,
int idx,
v4acc64 b )

◆ insert() [74/78]

v8cacc64 insert ( v8cacc64 a,
int idx,
v2cacc64 b )

◆ insert() [75/78]

v8cacc64 insert ( v8cacc64 a,
int idx,
v4cacc64 b )

◆ insert() [76/78]

v8caccfloat insert ( v8caccfloat a,
int idx,
v4caccfloat b )

◆ insert() [77/78]

v8cfloat insert ( v8cfloat a,
int idx,
v4cfloat b )

◆ insert() [78/78]

v8cint32 insert ( v8cint32 a,
int idx,
v4cint32 b )