BVH4 traverser "optimization"

BVH4 traverser "optimization"

I've noticed some not needed "else" for leaf nodes. This isbvh4_traverser.cpp: 122. Interestingly, Composer 2011 on Windows seems to have a problem optimizing that. I get 2-3% speed increase on my i7-920, after removing that "else". I tried something similar for the occlusion rays, but doesn't seem to make a difference:

while (true) {

      if (__builtin_expect(stackPtr == 0, false)) break;

      stackPtr--;
      cur = stack[stackPtr];

next:

      /*! this is an inner node */

      if (__builtin_expect(cur >= 0, true))

      {

        /*! single ray intersection with 4 boxes */

        const BVH4::Node& node = bvh->node(nodes,cur);

        ssef tNearX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearX)) * rdir.x;

        ssef tNearY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearY)) * rdir.y;

        ssef tNearZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearZ)) * rdir.z;

        ssef tNear = max(tNearX,tNearY,tNearZ,rayNear);

        ssef tFarX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farX)) * rdir.x;

        ssef tFarY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farY)) * rdir.y;

        ssef tFarZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farZ)) * rdir.z;

        ssef tFar = min(tFarX,tFarY,tFarZ,rayFar);

        size_t _hit = movemask(tNear <= tFar);
        /*! push hit nodes onto stack */

        if (__builtin_expect(_hit == 0, true)) continue;

        size_t r = __bsf(_hit); _hit = __btc(_hit,r);

        stack[stackPtr] = cur = node.child[r];

        if (__builtin_expect(_hit == 0, true)) goto next;

        r = __bsf(_hit); _hit = __btc(_hit,r);

        stack[++stackPtr] = cur = node.child[r];

        if (__builtin_expect(_hit == 0, true)) goto next;

        r = __bsf(_hit); _hit = __btc(_hit,r);

        stack[++stackPtr] = cur = node.child[r];

        if (__builtin_expect(_hit == 0, true)) goto next;

        r = __bsf(_hit); _hit = __btc(_hit,r);

        stack[++stackPtr] = cur = node.child[r];

        goto next;

      }
      /*! this is a leaf node */

      {

        cur ^= 0x80000000;

        const size_t ofs = size_t(cur) >> 5;

        const size_t num = size_t(cur) & 0x1F;

        for (size_t i=ofs; itriangles[i].occluded(ray))

            return true;

      }

    }

1 post / novo 0
Para obter mais informações sobre otimizações de compiladores, consulte Aviso sobre otimizações.