//! Represents the state of a particular generator typedef struct{ uint x; uint c; uint MWC64X_A; } mwc64x_state_t; enum{ MWC64X_M = 18446383549859758079UL }; // Pre: a=M)||(v>1; } return r; } // Pre: a=0 // Post: r=(a^b) mod M // This takes at most ~64^2 modular additions, so probably about 2^15 or so instructions on // most architectures ulong MWC_PowMod64(ulong a, ulong e, ulong M) { ulong sqr=a, acc=1; while(e!=0){ if(e&1) acc=MWC_MulMod64(acc,sqr,M); sqr=MWC_MulMod64(sqr,sqr,M); e=e>>1; } return acc; } void MWC64X_Step(mwc64x_state_t *s) { uint X=s->x, C=s->c; uint Xn=s->MWC64X_A*X+C; uint carry=(uint)(XnMWC64X_A,X,carry); s->x=Xn; s->c=Cn; } uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance) { ulong m=MWC_PowMod64(A, distance, M); ulong x=curr.x*(ulong)A+curr.y; x=MWC_MulMod64(x, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap) { // This is an arbitrary constant for starting LCG jumping from. I didn't // want to start from 1, as then you end up with the two or three first values // being a bit poor in ones - once you've decided that, one constant is as // good as any another. There is no deep mathematical reason for it, I just // generated a random number. enum{ MWC_BASEID = 4077358422479273989UL }; ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap; ulong m=MWC_PowMod64(A, dist, M); ulong x=MWC_MulMod64(MWC_BASEID, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset, uint bugfix) { uint2 tmp=MWC_SeedImpl_Mod64(s->MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset); s->x=tmp.x; s->c=tmp.y; s->MWC64X_A=bugfix; } void MWC64X_Skip(mwc64x_state_t *s, ulong distance) { uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->MWC64X_A, MWC64X_M, distance); s->x=tmp.x; s->c=tmp.y; } // Return a float uniformly distributed in [0,1[ float MWC64X_NextFloat(mwc64x_state_t *s) { uint res=s->x ^ s->c; MWC64X_Step(s); return ((float)res)/65536.0f/65536.0f; } // Returns a 32-bit integer uniformly distributed in [0..2^32) uint MWC64X_NextUint(mwc64x_state_t *s) { uint res=s->x ^ s->c; MWC64X_Step(s); return res; } __kernel void generateAWGNandDecodeLLRglobalMem(__global float *L) { mwc64x_state_t rng; int i = get_global_id(0); ulong baseOffset = i+100; MWC64X_SeedStreams(&rng, baseOffset, 4, 4294883355); L[i]=MWC64X_NextFloat(&rng); }