iaca: compiler messes with start/stop annotations

iaca: compiler messes with start/stop annotations

Hello,

I've annotated my code as follows:

void __tsan_read8(void *addr) {
  IACA_START;
  __asm__ __volatile__(""::: "memory");
  MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
  __asm__ __volatile__(""::: "memory");
  IACA_END;
}

However, compiler generates the following code:

00000000001b7040 <__tsan_read8>:
  1b7040:       53                      push   %rbx
  1b7041:       0f 0b                   ud2    
  1b7043:       bb 6f 00 00 00          mov    $0x6f,%ebx
  1b7048:       64                      fs
  1b7049:       67 90                   addr32 nop
  1b704b:       48 b8 f8 ff ff ff ff    movabs $0xffff83fffffffff8,%rax
  1b7052:       83 ff ff 
  1b7055:       48 ba 00 00 00 00 00    movabs $0x20000000000,%rdx
  1b705c:       02 00 00 
  1b705f:       4c 8b 4c 24 08          mov    0x8(%rsp),%r9
  1b7064:       48 21 f8                and    %rdi,%rax
  1b7067:       48 31 d0                xor    %rdx,%rax
  1b706a:       48 83 3c 85 00 00 00    cmpq   $0xffffffffffffffff,0x0(,%rax,4)
  1b7071:       00 ff 
  1b7073:       4c 8d 04 85 00 00 00    lea    0x0(,%rax,4),%r8
  1b707a:       00 
  1b707b:       74 0e                   je     1b708b <__tsan_read8+0x4b>
  1b707d:       64 48 8b 14 25 c0 73    mov    %fs:0xfffffffffff973c0,%rdx
  1b7084:       f9 ff 
  1b7086:       48 85 d2                test   %rdx,%rdx
  1b7089:       79 15                   jns    1b70a0 <__tsan_read8+0x60>
  1b708b:       bb de 00 00 00          mov    $0xde,%ebx
  1b7090:       64                      fs
  1b7091:       67 90                   addr32 nop
  1b7093:       0f 0b                   ud2    
  1b7095:       5b                      pop    %rbx
  1b7096:       c3                      retq   
  1b7097:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  1b709e:       00 00 
  1b70a0:       83 e7 07                and    $0x7,%edi
  1b70a3:       48 be ff ff ff ff ff    movabs $0xffffe3ffffffffff,%rsi
  1b70aa:       e3 ff ff 
  1b70ad:       48 b9 00 00 00 00 00    movabs $0x800000000000,%rcx
  1b70b4:       80 00 00 
  1b70b7:       48 21 d6                and    %rdx,%rsi
  1b70ba:       48 83 cf 18             or     $0x18,%rdi
  1b70be:       48 09 ce                or     %rcx,%rsi
  1b70c1:       48 c1 e7 2a             shl    $0x2a,%rdi
  1b70c5:       48 09 fe                or     %rdi,%rsi
  1b70c8:       0f 28 14 85 00 00 00    movaps 0x0(,%rax,4),%xmm2
  1b70cf:       00 
  1b70d0:       48 89 74 24 f8          mov    %rsi,-0x8(%rsp)
  1b70d5:       64 48 8b 3c 25 00 00    mov    %fs:0x0,%rdi
  1b70dc:       00 00 
  1b70de:       f3 0f 7e 6c 24 f8       movq   -0x8(%rsp),%xmm5
  1b70e4:       0f 28 e2                movaps %xmm2,%xmm4
  1b70e7:       f3 0f 7e c5             movq   %xmm5,%xmm0
  1b70eb:       0f 28 0c 85 10 00 00    movaps 0x10(,%rax,4),%xmm1
  1b70f2:       00 
  1b70f3:       0f 28 d8                movaps %xmm0,%xmm3
  1b70f6:       0f c6 e1 dd             shufps $0xdd,%xmm1,%xmm4
  1b70fa:       0f c6 d8 55             shufps $0x55,%xmm0,%xmm3
  1b70fe:       f3 0f 10 05 6a 13 05    movss  0x5136a(%rip),%xmm0        # 208470 <_ZN6__tsan8MutexSet8kMaxSizeE+0xfd8>
  1b7105:       00 
  1b7106:       0f c6 d1 88             shufps $0x88,%xmm1,%xmm2
  1b710a:       0f c6 c0 00             shufps $0x0,%xmm0,%xmm0
  1b710e:       66 0f eb c4             por    %xmm4,%xmm0
  1b7112:       66 0f 76 c3             pcmpeqd %xmm3,%xmm0
  1b7116:       f3 0f 7e 9f c8 73 f9    movq   -0x68c38(%rdi),%xmm3
  1b711d:       ff 
  1b711e:       0f c6 db 00             shufps $0x0,%xmm3,%xmm3
  1b7122:       66 0f 66 d3             pcmpgtd %xmm3,%xmm2
  1b7126:       66 0f db c2             pand   %xmm2,%xmm0
  1b712a:       66 0f d7 c8             pmovmskb %xmm0,%ecx
  1b712e:       85 c9                   test   %ecx,%ecx
  1b7130:       0f 85 55 ff ff ff       jne    1b708b <__tsan_read8+0x4b>
  1b7136:       48 83 c2 01             add    $0x1,%rdx
  1b713a:       41 bb 01 00 00 00       mov    $0x1,%r11d
  1b7140:       49 ba ff ff ff ff ff    movabs $0x3ffffffffff,%r10
  1b7147:       03 00 00 
  1b714a:       48 89 d1                mov    %rdx,%rcx
  1b714d:       49 21 d2                and    %rdx,%r10
  1b7150:       64 48 89 14 25 c0 73    mov    %rdx,%fs:0xfffffffffff973c0
  1b7157:       f9 ff 
  1b7159:       48 c1 e9 2a             shr    $0x2a,%rcx
  1b715d:       83 e1 07                and    $0x7,%ecx
  1b7160:       83 c1 0e                add    $0xe,%ecx
  1b7163:       49 d3 e3                shl    %cl,%r11
  1b7166:       49 83 eb 01             sub    $0x1,%r11
  1b716a:       4d 21 da                and    %r11,%r10
  1b716d:       41 f7 c2 ff 1f 00 00    test   $0x1fff,%r10d
  1b7174:       0f 84 1c 04 00 00       je     1b7596 <__tsan_read8+0x556>
  1b717a:       48 01 d2                add    %rdx,%rdx
  1b717d:       48 83 c6 01             add    $0x1,%rsi
  1b7181:       48 c1 ea 33             shr    $0x33,%rdx
  1b7185:       48 69 d2 00 00 13 01    imul   $0x1130000,%rdx,%rdx
  1b718c:       4a 8d 0c d2             lea    (%rdx,%r10,8),%rcx
  1b7190:       48 ba 00 00 00 00 00    movabs $0x600000000000,%rdx
  1b7197:       60 00 00 

...

 

You can see that the marks are inserted only around the very first fast-path in the function. And the majority of the code is moved outside of the annotated region. As the result iaca does not produce useful result.

Any suggestions on how to avoid this effect?

 

 

1 post / 0 new
For more complete information about compiler optimizations, see our Optimization Notice.