![]() |
AI Engine-ML v2 Intrinsics User Guide
v2025.1
|
Elementwise matrix multiplications emulated on top of bfloat16. More...
Emulated vector float operations with dynamic negation of * multiplication result, zeroing of acc1, negation of acc1 | |
| v32accfloat | mul_elem_32_conf (v32float v1, v32float v2, int sub_mul) |
| v32accfloat | mac_elem_32_conf (v32float v1, v32float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1) |
| v32accfloat | msc_elem_32_conf (v32float v1, v32float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1) |
| v32accfloat | addmac_elem_32_conf (v32float v1, v32float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2) |
| v32accfloat | addmsc_elem_32_conf (v32float v1, v32float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2) |
| v64accfloat | mul_elem_64_conf (v64float v1, v64float v2, int sub_mul) |
| v64accfloat | mac_elem_64_conf (v64float v1, v64float v2, v64accfloat acc, int zero_acc, int sub_mul, int sub_acc1) |
| v64accfloat | msc_elem_64_conf (v64float v1, v64float v2, v64accfloat acc, int zero_acc, int sub_mul, int sub_acc1) |
| v64accfloat | addmac_elem_64_conf (v64float v1, v64float v2, v64accfloat acc1, v64accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2) |
| v64accfloat | addmsc_elem_64_conf (v64float v1, v64float v2, v64accfloat acc1, v64accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2) |
| v32accfloat | mul_4x8_8x8_conf (v32float v1, v64float v2, int sub_mul) |
| v32accfloat | mac_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1) |
| v32accfloat | msc_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc, int zero_acc, int sub_mul, int sub_acc1) |
| v32accfloat | addmac_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2) |
| v32accfloat | addmsc_4x8_8x8_conf (v32float v1, v64float v2, v32accfloat acc1, v32accfloat acc2, int zero_acc, int sub_mul, int sub_acc1, int sub_acc2) |
Elementwise matrix multiplications emulated on top of bfloat16.
For an explanation how these operations works and the different accuracies provided, please refer to Multiply Accumulate.
| v32accfloat addmac_4x8_8x8 | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 1 input |
| v32accfloat addmac_4x8_8x8_conf | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1, | ||
| int | sub_acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| zero_acc | Zeroing mask for acc1 |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc1 |
| sub_acc2 | Negation mask of acc2 |
| v32accfloat addmac_elem_32 | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 1 input |
| v32accfloat addmac_elem_32_conf | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1, | ||
| int | sub_acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| zero_acc | Zeroing mask for acc1 |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc1 |
| sub_acc2 | Negation mask of acc2 |
| v64accfloat addmac_elem_64 | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc1, | ||
| v64accfloat | acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 1 input |
| v64accfloat addmac_elem_64_conf | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc1, | ||
| v64accfloat | acc2, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1, | ||
| int | sub_acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| zero_acc | Zeroing mask for acc1 |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc1 |
| sub_acc2 | Negation mask of acc2 |
| v32accfloat addmsc_4x8_8x8 | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| v32accfloat addmsc_4x8_8x8_conf | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1, | ||
| int | sub_acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| zero_acc | Zeroing mask for acc1 |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc1 |
| sub_acc2 | Negation mask of acc2 |
| v32accfloat addmsc_elem_32 | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| v32accfloat addmsc_elem_32_conf | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc1, | ||
| v32accfloat | acc2, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1, | ||
| int | sub_acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| zero_acc | Zeroing mask for acc1 |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc1 |
| sub_acc2 | Negation mask of acc2 |
| v64accfloat addmsc_elem_64 | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc1, | ||
| v64accfloat | acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| v64accfloat addmsc_elem_64_conf | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc1, | ||
| v64accfloat | acc2, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1, | ||
| int | sub_acc2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc1 | Accumulator 1 input |
| acc2 | Accumulator 2 input |
| zero_acc | Zeroing mask for acc1 |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc1 |
| sub_acc2 | Negation mask of acc2 |
| v32accfloat mac_4x8_8x8 | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| v32accfloat mac_4x8_8x8_conf | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| zero_acc | Zeroing mask for acc |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc |
| v32accfloat mac_elem_32 | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| v32accfloat mac_elem_32_conf | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| zero_acc | Zeroing mask for acc |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc |
| v64accfloat mac_elem_64 | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| v64accfloat mac_elem_64_conf | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| zero_acc | Zeroing mask for acc |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc |
| v32accfloat msc_4x8_8x8 | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| v32accfloat msc_4x8_8x8_conf | ( | v32float | v1, |
| v64float | v2, | ||
| v32accfloat | acc, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| zero_acc | Zeroing mask for acc |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc |
| v32accfloat msc_elem_32 | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| v32accfloat msc_elem_32_conf | ( | v32float | v1, |
| v32float | v2, | ||
| v32accfloat | acc, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| zero_acc | Zeroing mask for acc |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc |
| v64accfloat msc_elem_64 | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| v64accfloat msc_elem_64_conf | ( | v64float | v1, |
| v64float | v2, | ||
| v64accfloat | acc, | ||
| int | zero_acc, | ||
| int | sub_mul, | ||
| int | sub_acc1 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| acc | Accumulator 1 input |
| zero_acc | Zeroing mask for acc |
| sub_mul | Negation mask for multiplication result |
| sub_acc1 | Negation mask of acc |
| v32accfloat mul_4x8_8x8 | ( | v32float | v1, |
| v64float | v2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v32accfloat mul_4x8_8x8_conf | ( | v32float | v1, |
| v64float | v2, | ||
| int | sub_mul ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v32accfloat mul_elem_32 | ( | v32float | v1, |
| v32float | v2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v32accfloat mul_elem_32_conf | ( | v32float | v1, |
| v32float | v2, | ||
| int | sub_mul ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v64accfloat mul_elem_64 | ( | v64float | v1, |
| v64float | v2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v64accfloat mul_elem_64_conf | ( | v64float | v1, |
| v64float | v2, | ||
| int | sub_mul ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v32accfloat negmul_4x8_8x8 | ( | v32float | v1, |
| v64float | v2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v32accfloat negmul_elem_32 | ( | v32float | v1, |
| v32float | v2 ) |
| v1 | Matrix A |
| v2 | Matrix B |
| v64accfloat negmul_elem_64 | ( | v64float | v1, |
| v64float | v2 ) |
| v1 | Matrix A |
| v2 | Matrix B |