Intel® C++ Compiler Classic Developer Guide and Reference

ID 767249
Date 7/13/2023
Public
Document Table of Contents

Intrinsics for Integer Permutation Operations

The prototypes for Intel® Advanced Vector Extensions 512 (Intel® AVX-512) intrinsics are located in the zmmintrin.h header file.

To use these intrinsics, include the immintrin.h file as follows:

#include <immintrin.h>


Intrinsic Name

Operation

Corresponding
Intel® AVX-512 Instruction

_mm512_permutex2var_epi32, _mm512_mask_permutex2var_epi32, _mm512_mask2_permutex2var_epi32, _mm512_maskz_permutex2var_epi32

Shuffle int32 elements across lanes.

VPERMI2D

_mm512_permutex2var_epi64, _mm512_mask_permutex2var_epi64, _mm512_mask2_permutex2var_epi64, _mm512_maskz_permutex2var_epi64

Shuffle int64 elements across lanes.

VPERMI2Q, VPERMT2Q

_mm512_permutevar_epi32, _mm512_mask_permutevar_epi32

_mm512_permutexvar_epi32, _mm512_mask_permutexvar_epi32, _mm512_maskz_permutexvar_epi32

Shuffle int32 elements across lanes.

VPERMD

_mm512_permutex_epi64, _mm512_mask_permutex_epi64, _mm512_maskz_permutex_epi64

_mm512_permutexvar_epi64, _mm512_mask_permutexvar_epi64, _mm512_maskz_permutexvar_epi64

Shuffle int64 elements across lanes.

VPERMQ


variable definition
k

writemask used as a selector

a

first source vector element

src

source element to use based on writemask result

idx

int32 vector containing indices in memory


_mm512_permutevar_epi32

extern __m512i __cdecl _mm512_permutevar_epi32(__m512i a, __m512i idx);

Shuffle int32 elements in a across lanes using the corresponding index in idx, and stores the result.

NOTE:

This intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.


_mm512_mask_permutevar_epi32

extern __m512i __cdecl _mm512_mask_permutevar_epi32(__m512i src, __mmask16 k, __m512i a, __m512i idx);

Shuffle int32 elements in a across lanes using the corresponding index in idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).

NOTE:

This intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.



_mm512_permutexvar_epi32

extern __m512i __cdecl _mm512_permutexvar_epi32(__m512i idx, __m512i a);

Shuffles int32 elements in a across lanes using the corresponding index in idx, and stores the result.


_mm512_mask_permutexvar_epi32

extern __m512i __cdecl _mm512_mask_permutexvar_epi32(__m512i src, __mmask16 k, __m512i idx, __m512i a);

Shuffles int32 elements in a across lanes using the corresponding index in idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).


_mm512_maskz_permutexvar_epi32

extern __m512i __cdecl _mm512_maskz_permutexvar_epi32(__mmask16 k, __m512i idx, __m512i a);

Shuffles int32 elements in a across lanes using the corresponding index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm512_permutex2var_epi32

extern __m512i __cdecl _mm512_permutex2var_epi32(__m512i a, __m512i idx, __m512i b);

Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result.


_mm512_mask_permutex2var_epi32

extern __m512i __cdecl _mm512_mask_permutex2var_epi32(__m512i a, __mmask16 k, __m512i idx, __m512i b);

Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from a when the corresponding mask bit is not set).


_mm512_mask2_permutex2var_epi32

extern __m512i __cdecl _mm512_mask2_permutex2var_epi32(__m512i a, __m512i idx, __mmask16 k, __m512i b);

Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from idx when the corresponding mask bit is not set).



_mm512_maskz_permutex2var_epi32

extern __m512i __cdecl _mm512_maskz_permutex2var_epi32(__mmask16 k, __m512i a, __m512i idx, __m512i b);

Shuffles int32 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).




_mm512_permutex2var_epi64

extern __m512i __cdecl _mm512_permutex2var_epi64(__m512i a, __m512i idx, __m512i b);

Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result.


_mm512_mask_permutex2var_epi64

extern __m512i __cdecl _mm512_mask_permutex2var_epi64(__m512i a, __mmask8 k, __m512i idx, __m512i b);

Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from a when the corresponding mask bit is not set).


_mm512_mask2_permutex2var_epi64

extern __m512i __cdecl _mm512_mask2_permutex2var_epi64(__m512i a, __m512i idx, __mmask8 k, __m512i b);

Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using writemask k (elements are copied from idx when the corresponding mask bit is not set).


_mm512_maskz_permutex2var_epi64

extern __m512i __cdecl _mm512_maskz_permutex2var_epi64(__mmask8 k, __m512i a, __m512i idx, __m512i b);

Shuffles int64 elements in a and b across lanes using the corresponding selector and index in idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm512_permutex_epi64

extern __m512i __cdecl _mm512_permutex_epi64(__m512i a, const int imm);

Shuffles int64 elements in a within 256-bit lanes using the control in imm, and stores the result.


_mm512_mask_permutex_epi64

extern __m512i __cdecl _mm512_mask_permutex_epi64(__m512i src, __mmask8 k, __m512i a, const int imm);

Shuffles int64 elements in a within 256-bit lanes using the control in imm, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).


_mm512_maskz_permutex_epi64

extern __m512i __cdecl _mm512_maskz_permutex_epi64(__mmask8 k, __m512i a, const int imm);

Shuffles int64 elements in a within 256-bit lanes using the control in imm, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm512_permutexvar_epi64

extern __m512i __cdecl _mm512_permutexvar_epi64(__m512i idx, __m512i a);

Shuffles int64 elements in a across lanes using the corresponding index idx, and stores the result.


_mm512_mask_permutexvar_epi64

extern __m512i __cdecl _mm512_mask_permutexvar_epi64(__m512i src, __mmask8 k, __m512i idx, __m512i a);

Shuffles int64 elements in a across lanes using the corresponding index idx, and stores the result using writemask k (elements are copied from src when the corresponding mask bit is not set).


_mm512_maskz_permutexvar_epi64

extern __m512i __cdecl _mm512_maskz_permutexvar_epi64(__mmask8 k, __m512i idx, __m512i a);

Shuffles int64 elements in a across lanes using the corresponding index idx, and stores the result using zeromask k (elements are zeroed out when the corresponding mask bit is not set).