Intel® C++ Compiler Classic Developer Guide and Reference

ID 767249
Date 7/13/2023
Public
Document Table of Contents

Miscellaneous Intrinsics

Intel® Streaming SIMD Extensions 2 (Intel® SSE2) intrinsics for miscellaneous operations are listed in the following table followed by descriptions.

The prototypes for Intel® SSE2 intrinsics are in the emmintrin.h header file.

To use these intrinsics, include the immintrin.h file as follows:

#include <immintrin.h>

Intrinsic

Operation

Corresponding Intel® SSE 2 Instruction

_mm_packs_epi16

Packed Saturation

PACKSSWB

_mm_packs_epi32

Packed Saturation

PACKSSDW

_mm_packus_epi16

Packed Saturation

PACKUSWB

_mm_extract_epi16

Extraction

PEXTRW

_mm_insert_epi16

Insertion

PINSRW

_mm_movemask_epi8

Mask Creation

PMOVMSKB

_mm_shuffle_epi32

Shuffle

PSHUFD

_mm_shufflehi_epi16

Shuffle

PSHUFHW

_mm_shufflelo_epi16

Shuffle

PSHUFLW

_mm_unpackhi_epi8

Interleave

PUNPCKHBW

_mm_unpackhi_epi16

Interleave

PUNPCKHWD

_mm_unpackhi_epi32

Interleave

PUNPCKHDQ

_mm_unpackhi_epi64

Interleave

PUNPCKHQDQ

_mm_unpacklo_epi8

Interleave

PUNPCKLBW

_mm_unpacklo_epi16

Interleave

PUNPCKLWD

_mm_unpacklo_epi32

Interleave

PUNPCKLDQ

_mm_unpacklo_epi64

Interleave

PUNPCKLQDQ

_mm_movepi64_pi64

Move

MOVDQ2Q

_mm_movpi64_epi64

Move

MOVDQ2Q

_mm_move_epi64

Move

MOVQ

_mm_unpackhi_pd

Interleave

UNPCKHPD

_mm_unpacklo_pd

Interleave

UNPCKLPD

_mm_movemask_pd

Create mask

MOVMSKPD

_mm_shuffle_pd

Select values

SHUFPD

_mm_packs_epi16

__m128i _mm_packs_epi16(__m128i a, __m128i b);

Packs the 16 signed 16-bit integers from a and b into 8-bit integers and saturates.

R0

...

R7

R8

...

R15

Signed Saturate(a0)

...

Signed Saturate(a7)

Signed Saturate(b0)

...

Signed Saturate(b7)

_mm_packs_epi32

__m128i _mm_packs_epi32(__m128i a, __m128i b);

Packs the eight signed 32-bit integers from a and b into signed 16-bit integers and saturates.

R0

...

R3

R4

...

R7

Signed Saturate(a0)

...

Signed Saturate(a3)

Signed Saturate(b0)

...

Signed Saturate(b3)

_mm_packus_epi16

__m128i _mm_packus_epi16(__m128i a, __m128i b);

Packs the 16 signed 16-bit integers from a and b into 8-bit unsigned integers and saturates.

R0

...

R7

R8

...

R15

Unsigned Saturate(a0)

...

Unsigned Saturate(a7)

Unsigned Saturate(b0)

...

Unsigned Saturate(b15)

_mm_extract_epi16

int _mm_extract_epi16(__m128i a, int imm);

Extracts the selected signed or unsigned 16-bit integer from a and zero extends. The selector imm must be an immediate.

R0

(imm == 0) ? a0: ( (imm == 1) ? a1: ... (imm==7) ? a7)

_mm_insert_epi16

__m128i _mm_insert_epi16(__m128i a, int b, int imm);

Inserts the least significant 16 bits of b into the selected 16-bit integer of a. The selector imm must be an immediate.

R0

R1

...

R7

(imm == 0) ? b : a0;

(imm == 1) ? b : a1;

...

(imm == 7) ? b : a7;

_mm_movemask_epi8

int _mm_movemask_epi8(__m128i a);

Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits.

R0

a15[7] << 15 | a14[7] << 14 | ... a1[7] << 1 | a0[7]

_mm_shuffle_epi32

__m128i _mm_shuffle_epi32(__m128i a, int imm);

Shuffles the four signed or unsigned 32-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.

_mm_shufflehi_epi16

__m128i _mm_shufflehi_epi16(__m128i a, int imm);

Shuffles the upper four signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.

_mm_shufflelo_epi16

__m128i _mm_shufflelo_epi16(__m128i a, int imm);

Shuffles the lower four signed or unsigned 16-bit integers in a as specified by imm. The shuffle value, imm, must be an immediate. See Macro Function for Shuffle for a description of shuffle semantics.

_mm_unpackhi_epi8

__m128i _mm_unpackhi_epi8(__m128i a, __m128i b);

Interleaves the upper eight signed or unsigned 8-bit integers in a with the upper eight signed or unsigned 8-bit integers in b.

R0

R1

R2

R3

...

R14

R15

a8

b8

a9

b9

...

a15

b15

_mm_unpackhi_epi16

__m128i _mm_unpackhi_epi16(__m128i a, __m128i b);

Interleaves the upper four signed or unsigned 16-bit integers in a with the upper four signed or unsigned 16-bit integers in b.

R0

R1

R2

R3

R4

R5

R6

R7

a4

b4

a5

b5

a6

b6

a7

b7

_mm_unpackhi_epi32

__m128i _mm_unpackhi_epi32(__m128i a, __m128i b);

Interleaves the upper two signed or unsigned 32-bit integers in a with the upper two signed or unsigned 32-bit integers in b.

R0

R1

R2

R3

a2

b2

a3

b3

_mm_unpackhi_epi64

__m128i _mm_unpackhi_epi64(__m128i a, __m128i b);

Interleaves the upper signed or unsigned 64-bit integer in a with the upper signed or unsigned 64-bit integer in b.

R0

R1

a1

b1

_mm_unpacklo_epi8

__m128i _mm_unpacklo_epi8(__m128i a, __m128i b);

Interleaves the lower eight signed or unsigned 8-bit integers in a with the lower eight signed or unsigned 8-bit integers in b.

R0

R1

R2

R3

...

R14

R15

a0

b0

a1

b1

...

a7

b7

_mm_unpacklo_epi16

__m128i _mm_unpacklo_epi16(__m128i a, __m128i b);

Interleaves the lower four signed or unsigned 16-bit integers in a with the lower four signed or unsigned 16-bit integers in b.

R0

R1

R2

R3

R4

R5

R6

R7

a0

b0

a1

b1

a2

b2

a3

b3

_mm_unpacklo_epi32

__m128i _mm_unpacklo_epi32(__m128i a, __m128i b);

Interleaves the lower two signed or unsigned 32-bit integers in a with the lower two signed or unsigned 32-bit integers in b.

R0

R1

R2

R3

a0

b0

a1

b1

_mm_unpacklo_epi64

__m128i _mm_unpacklo_epi64(__m128i a, __m128i b);

Interleaves the lower signed or unsigned 64-bit integer in a with the lower signed or unsigned 64-bit integer in b.

R0

R1

a0

b0

_mm_movepi64_pi64

__m64 _mm_movepi64_pi64(__m128i a);

Returns the lower 64 bits of a as an __m64 type.

R0

a0

_mm_movpi64_pi64

__m128i _mm_movpi64_pi64(__m64 a);

Moves the 64 bits of a to the lower 64 bits of the result, zeroing the upper bits.

R0

R1

a0

0X0

_mm_move_epi64

__m128i _mm_move_epi64(__m128i a);

Moves the lower 64 bits of a to the lower 64 bits of the result, zeroing the upper bits.

R0

R1

a0

0X0

_mm_unpackhi_pd

__m128d _mm_unpackhi_pd(__m128d a, __m128d b);

Interleaves the upper DP FP values of a and b.

R0

R1

a1

b1

_mm_unpacklo_pd

__m128d _mm_unpacklo_pd(__m128d a, __m128d b);

Interleaves the lower DP FP values of a and b.

R0

R1

a0

b0

_mm_movemask_pd

int _mm_movemask_pd(__m128d a);

Creates a two-bit mask from the sign bits of the two DP FP values of a.

R

sign(a1) << 1 | sign(a0)

_mm_shuffle_pd

__m128d _mm_shuffle_pd(__m128d a, __m128d b, int i)

Selects two specific DP FP values from a and b, based on the mask i. The mask must be an immediate. See Macro Function for Shuffle for a description of the shuffle semantics.