// BT.cpp : Test bit intrinsics
#include
#if defined(_M_X64)
#pragma intrinsic(_bittestandcomplement64)
#else
#pragma intrinsic(_bittestandcomplement)
#endif
int global_static_integer;
__inline int btc_func_style(int mask, int i)
{
_bittestandcomplement((long*) &mask, i);
return mask;
}
__inline void btc_mem_style(int* mask, int i)
{
_bittestandcomplement((long*) mask, i);
}
__declspec(noinline) void test_mem_style(int i)
{
btc_mem_style(&global_static_integer, i);
}
__declspec(noinline) void test_func_style(int i)
{
global_static_integer = btc_func_style(global_static_integer, i);
}
int main(int argc, char* argv[])
{
test_mem_style(argc + 1);
test_func_style(argc);
return global_static_integer;
}
Here is the output of these 2 simple test functions: ; mark_description "Intel C++ Compiler for applications running on IA-32, Version 12.1.3.300 Build 20120130";
?test_mem_style@@YAXH@Z PROC NEAR PRIVATE
; parameter 1(i): eax
sub esp, 12 ;27.1
mov edx, OFFSET FLAT: ?global_static_integer@@3HA ;28.3
btc DWORD PTR [edx], eax ;28.3
setb al ;28.3
add esp, 12 ;29.1
ret ;29.1
?test_mem_style@@YAXH@Z ENDP
?test_func_style@@YAXH@Z PROC NEAR PRIVATE
; parameter 1(i): eax
sub esp, 12 ;33.1
mov ecx, DWORD PTR [?global_static_integer@@3HA] ;34.3
lea edx, DWORD PTR [esp] ;34.3
mov DWORD PTR [esp], ecx ;34.3
btc DWORD PTR [edx], eax ;34.3
setb al ;34.3
mov eax, DWORD PTR [esp] ;34.3
mov DWORD PTR [?global_static_integer@@3HA], eax ;34.3
add esp, 12 ;35.1
$LN51:
ret ;35.1
?test_func_style@@YAXH@Z ENDP
As you can easily see now, the geneated code is far from being optimal. The compiler creates a few useless stack memory read and writes, stack pointer adjustmens, and a setb instruction for no obvious reasons. Could somebody transfer this message to the complier development group as an improvement request? Thanks!The same problem exists for BTR and BTS intrinsics too. And the same issue has also been confirmed on x64.




