AI Engine-ML v2 Intrinsics User Guide  v2025.1
Loading...
Searching...
No Matches
Emulated Multiply-accumulate of 32-bit Floating-Point

Elementwise matrix multiplications emulated on top of bfloat16. More...

Emulated vector float operations

v32accfloat mul_elem_32 (v32float v1, v32float v2)
 
v32accfloat negmul_elem_32 (v32float v1, v32float v2)
 
v32accfloat mac_elem_32 (v32float v1, v32float v2, v32accfloat acc)
 
v32accfloat msc_elem_32 (v32float v1, v32float v2, v32accfloat acc)
 
v32accfloat addmac_elem_32 (v32float v1, v32float v2, v32accfloat acc1, v32accfloat acc2)
 
v32accfloat addmsc_elem_32 (v32float v1, v32float v2, v32accfloat acc1, v32accfloat acc2)
 
v64accfloat mul_elem_64 (v64float v1, v64float v2)
 
v64accfloat negmul_elem_64 (v64float v1, v64float v2)
 
v64accfloat mac_elem_64 (v64float v1, v64float v2, v64accfloat acc)
 
v64accfloat msc_elem_64 (v64float v1, v64float v2, v64accfloat acc)
 
v64accfloat addmac_elem_64 (v64float v1, v64float v2, v64accfloat acc1, v64accfloat acc2)
 
v64accfloat addmsc_elem_64 (v64float v1, v64float v2, v64accfloat acc1, v64accfloat acc2)
 
v32accfloat mul_4x8_8x8 (v32float v1, v64float v2)
 
v32accfloat negmul_4x8_8x8 (v32float v1, v64float v2)
 
v32accfloat mac_4x8_8x8 (v32float v1, v64float v2, v32accfloat acc)
 
v32accfloat msc_4x8_8x8 (v32float v1, v64float v2, v32accfloat acc)
 
v32accfloat addmac_4x8_8x8 (v32float v1, v64float v2, v32accfloat acc1, v32accfloat acc2)
 
v32accfloat addmsc_4x8_8x8 (v32float v1, v64float v2, v32accfloat acc1, v32accfloat acc2)
 

Emulated vector float operations with dynamic negation of * multiplication result, zeroing of acc1, negation of acc1

v32accfloat mul_elem_32_conf (v32float v1, v32float v2, int sub_mul)
 
v32accfloat mac_elem_32_conf (v32float v1, v32float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1)
 
v32accfloat msc_elem_32_conf (v32float v1, v32float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1)
 
v32accfloat addmac_elem_32_conf (v32float v1, v32float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2)
 
v32accfloat addmsc_elem_32_conf (v32float v1, v32float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2)
 
v64accfloat mul_elem_64_conf (v64float v1, v64float v2, int sub_mul)
 
v64accfloat mac_elem_64_conf (v64float v1, v64float v2, v64accfloat acc, int zero_acc, int sub_mul, int sub_acc1)
 
v64accfloat msc_elem_64_conf (v64float v1, v64float v2, v64accfloat acc, int zero_acc, int sub_mul, int sub_acc1)
 
v64accfloat addmac_elem_64_conf (v64float v1, v64float v2, v64accfloat acc1, v64accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2)
 
v64accfloat addmsc_elem_64_conf (v64float v1, v64float v2, v64accfloat acc1, v64accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2)
 
v32accfloat mul_4x8_8x8_conf (v32float v1, v64float v2, int sub_mul)
 
v32accfloat mac_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1)
 
v32accfloat msc_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1)
 
v32accfloat addmac_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2)
 
v32accfloat addmsc_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2)
 

Detailed Description

Elementwise matrix multiplications emulated on top of bfloat16.

For an explanation how these operations works and the different accuracies provided, please refer to Multiply Accumulate.

Function Documentation

◆ addmac_4x8_8x8()

v32accfloat addmac_4x8_8x8 ( v32float v1,
v64float v2,
v32accfloat acc1,
v32accfloat acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 1 input
Returns
Result of operation

◆ addmac_4x8_8x8_conf()

v32accfloat addmac_4x8_8x8_conf ( v32float v1,
v64float v2,
v32accfloat acc1,
v32accfloat acc2,
int zero_acc,
int sub_mul,
int sub_acc1,
int sub_acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
zero_accZeroing mask for acc1
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc1
sub_acc2Negation mask of acc2
Returns
Result of operation

◆ addmac_elem_32()

v32accfloat addmac_elem_32 ( v32float v1,
v32float v2,
v32accfloat acc1,
v32accfloat acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 1 input
Returns
Result of operation

◆ addmac_elem_32_conf()

v32accfloat addmac_elem_32_conf ( v32float v1,
v32float v2,
v32accfloat acc1,
v32accfloat acc2,
int zero_acc,
int sub_mul,
int sub_acc1,
int sub_acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
zero_accZeroing mask for acc1
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc1
sub_acc2Negation mask of acc2
Returns
Result of operation

◆ addmac_elem_64()

v64accfloat addmac_elem_64 ( v64float v1,
v64float v2,
v64accfloat acc1,
v64accfloat acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 1 input
Returns
Result of operation

◆ addmac_elem_64_conf()

v64accfloat addmac_elem_64_conf ( v64float v1,
v64float v2,
v64accfloat acc1,
v64accfloat acc2,
int zero_acc,
int sub_mul,
int sub_acc1,
int sub_acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
zero_accZeroing mask for acc1
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc1
sub_acc2Negation mask of acc2
Returns
Result of operation

◆ addmsc_4x8_8x8()

v32accfloat addmsc_4x8_8x8 ( v32float v1,
v64float v2,
v32accfloat acc1,
v32accfloat acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
Returns
Result of operation

◆ addmsc_4x8_8x8_conf()

v32accfloat addmsc_4x8_8x8_conf ( v32float v1,
v64float v2,
v32accfloat acc1,
v32accfloat acc2,
int zero_acc,
int sub_mul,
int sub_acc1,
int sub_acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
zero_accZeroing mask for acc1
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc1
sub_acc2Negation mask of acc2
Returns
Result of operation

◆ addmsc_elem_32()

v32accfloat addmsc_elem_32 ( v32float v1,
v32float v2,
v32accfloat acc1,
v32accfloat acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
Returns
Result of operation

◆ addmsc_elem_32_conf()

v32accfloat addmsc_elem_32_conf ( v32float v1,
v32float v2,
v32accfloat acc1,
v32accfloat acc2,
int zero_acc,
int sub_mul,
int sub_acc1,
int sub_acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
zero_accZeroing mask for acc1
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc1
sub_acc2Negation mask of acc2
Returns
Result of operation

◆ addmsc_elem_64()

v64accfloat addmsc_elem_64 ( v64float v1,
v64float v2,
v64accfloat acc1,
v64accfloat acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
Returns
Result of operation

◆ addmsc_elem_64_conf()

v64accfloat addmsc_elem_64_conf ( v64float v1,
v64float v2,
v64accfloat acc1,
v64accfloat acc2,
int zero_acc,
int sub_mul,
int sub_acc1,
int sub_acc2 )
Parameters
v1Matrix A
v2Matrix B
acc1Accumulator 1 input
acc2Accumulator 2 input
zero_accZeroing mask for acc1
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc1
sub_acc2Negation mask of acc2
Returns
Result of operation

◆ mac_4x8_8x8()

v32accfloat mac_4x8_8x8 ( v32float v1,
v64float v2,
v32accfloat acc )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
Returns
Result of operation

◆ mac_4x8_8x8_conf()

v32accfloat mac_4x8_8x8_conf ( v32float v1,
v64float v2,
v32accfloat acc,
int zero_acc,
int sub_mul,
int sub_acc1 )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
zero_accZeroing mask for acc
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc
Returns
Result of operation

◆ mac_elem_32()

v32accfloat mac_elem_32 ( v32float v1,
v32float v2,
v32accfloat acc )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
Returns
Result of operation

◆ mac_elem_32_conf()

v32accfloat mac_elem_32_conf ( v32float v1,
v32float v2,
v32accfloat acc,
int zero_acc,
int sub_mul,
int sub_acc1 )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
zero_accZeroing mask for acc
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc
Returns
Result of operation

◆ mac_elem_64()

v64accfloat mac_elem_64 ( v64float v1,
v64float v2,
v64accfloat acc )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
Returns
Result of operation

◆ mac_elem_64_conf()

v64accfloat mac_elem_64_conf ( v64float v1,
v64float v2,
v64accfloat acc,
int zero_acc,
int sub_mul,
int sub_acc1 )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
zero_accZeroing mask for acc
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc
Returns
Result of operation

◆ msc_4x8_8x8()

v32accfloat msc_4x8_8x8 ( v32float v1,
v64float v2,
v32accfloat acc )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
Returns
Result of operation

◆ msc_4x8_8x8_conf()

v32accfloat msc_4x8_8x8_conf ( v32float v1,
v64float v2,
v32accfloat acc,
int zero_acc,
int sub_mul,
int sub_acc1 )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
zero_accZeroing mask for acc
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc
Returns
Result of operation

◆ msc_elem_32()

v32accfloat msc_elem_32 ( v32float v1,
v32float v2,
v32accfloat acc )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
Returns
Result of operation

◆ msc_elem_32_conf()

v32accfloat msc_elem_32_conf ( v32float v1,
v32float v2,
v32accfloat acc,
int zero_acc,
int sub_mul,
int sub_acc1 )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
zero_accZeroing mask for acc
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc
Returns
Result of operation

◆ msc_elem_64()

v64accfloat msc_elem_64 ( v64float v1,
v64float v2,
v64accfloat acc )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
Returns
Result of operation

◆ msc_elem_64_conf()

v64accfloat msc_elem_64_conf ( v64float v1,
v64float v2,
v64accfloat acc,
int zero_acc,
int sub_mul,
int sub_acc1 )
Parameters
v1Matrix A
v2Matrix B
accAccumulator 1 input
zero_accZeroing mask for acc
sub_mulNegation mask for multiplication result
sub_acc1Negation mask of acc
Returns
Result of operation

◆ mul_4x8_8x8()

v32accfloat mul_4x8_8x8 ( v32float v1,
v64float v2 )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ mul_4x8_8x8_conf()

v32accfloat mul_4x8_8x8_conf ( v32float v1,
v64float v2,
int sub_mul )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ mul_elem_32()

v32accfloat mul_elem_32 ( v32float v1,
v32float v2 )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ mul_elem_32_conf()

v32accfloat mul_elem_32_conf ( v32float v1,
v32float v2,
int sub_mul )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ mul_elem_64()

v64accfloat mul_elem_64 ( v64float v1,
v64float v2 )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ mul_elem_64_conf()

v64accfloat mul_elem_64_conf ( v64float v1,
v64float v2,
int sub_mul )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ negmul_4x8_8x8()

v32accfloat negmul_4x8_8x8 ( v32float v1,
v64float v2 )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ negmul_elem_32()

v32accfloat negmul_elem_32 ( v32float v1,
v32float v2 )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation

◆ negmul_elem_64()

v64accfloat negmul_elem_64 ( v64float v1,
v64float v2 )
Parameters
v1Matrix A
v2Matrix B
Returns
Result of operation