Parallel reduce

Parallel version of the Sieve of Eratosthenes

Example program that computes number of prime numbers up to n, where n is a command line argument. The algorithm here is a fairly efficient version of the sieve of Eratosthenes. The parallel version demonstrates how to use parallel_reduce, and in particular how to exploit lazy splitting.

 ```#include "cassert" #include "cstdio" #include "cstring" #include "math.h" #include "cstdlib" #include "cctype" #include "tbb/parallel_reduce.h" #include "tbb/task_scheduler_init.h" #include "tbb/tick_count.h" using namespace std; using namespace tbb; typedef unsigned long Number; //! If true, then print primes on stdout. static bool PrintPrimes = false; //! Grainsize parameter static Number GrainSize = 1000; class Multiples { inline Number strike( Number start, Number limit, Number stride ) { // Hoist "my_is_composite" into register for sake of speed. bool* is_composite = my_is_composite; assert( stride>=2 ); for( ;start is_composite[start] = true; return start; } //! Window into conceptual sieve bool* my_is_composite; //! Indexes into window /** my_striker[k] is an index into my_composite corresponding to an odd multiple multiple of my_factor[k]. */ Number* my_striker; //! Prime numbers less than m. Number* my_factor; public: //! Number of factors in my_factor. Number n_factor; Number m; Multiples( Number n ) : is_forked_copy(false) { m = Number(sqrt(double(n))); // Round up to even m += m&1; my_is_composite = new bool[m/2]; my_striker = new Number[m/2]; my_factor = new Number[m/2]; n_factor = 0; memset( my_is_composite, 0, m/2 ); for( Number i=3; i if( !my_is_composite[i/2] ) { if( PrintPrimes ) printf("%dn",(int)i); my_striker[n_factor] = strike( i/2, m/2, i ); my_factor[n_factor++] = i; } } } //! Find primes in range [start,window_size), advancing my_striker as we go. /** Returns number of primes found. */ Number find_primes_in_window( Number start, Number window_size ) { bool* is_composite = my_is_composite; memset( is_composite, 0, window_size/2 ); for( size_t k=0; k my_striker[k] = strike( my_striker[k]-m/2, window_size/2, my_factor[k] ); Number count = 0; for( Number k=0; k if( !is_composite[k] ) { if( PrintPrimes ) printf("%ldn",long(start+2*k+1)); ++count; } } return count; } ~Multiples() { if( !is_forked_copy ) delete[] my_factor; delete[] my_striker; delete[] my_is_composite; } //------------------------------------------------------------------------ // Begin extra members required by parallel version //------------------------------------------------------------------------ //! True if this instance was forked from another instance. const bool is_forked_copy; Multiples( const Multiples& f, split ) : n_factor(f.n_factor), m(f.m), my_is_composite(NULL), my_striker(NULL), my_factor(f.my_factor), is_forked_copy(true) {} bool is_initialized() const { return my_is_composite!=NULL; } void initialize( Number start ) { assert( start>=1 ); my_is_composite = new bool[m/2]; my_striker = new Number[m/2]; for( size_t k=0; k Number f = my_factor[k]; Number p = (start-1)/f*f % m; my_striker[k] = (p&1 ? p+2*f : p+f)/2; assert( m/2<=my_striker[k] ); } } //------------------------------------------------------------------------ // End extra methods required by parallel version //------------------------------------------------------------------------ }; //! Count number of primes between 0 and n /** This is the serial version. */ Number SerialCountPrimes( Number n ) { // Two is special case Number count = n>=2; if( n>=3 ) { Multiples multiples(n); count += multiples.n_factor; if( PrintPrimes ) printf("---n"); Number window_size = multiples.m; for( Number j=multiples.m; j>=n; j+=window_size ) { if( j+window_size>n+1 ) window_size = n+1-j; count += multiples.find_primes_in_window( j, window_size ); } } return count; } //! Range of a sieve window. class SieveRange { //! Width of full-size window into sieve. const Number my_stride; //! Always multiple of my_stride Number my_begin; //! One past last number in window. Number my_end; //! Width above which it is worth forking. const Number my_grainsize; bool assert_okay() const { assert( my_begin%my_stride==0 ); assert( my_begin<=my_end ); assert( my_stride<=my_grainsize ); return true; } public: //------------------------------------------------------------------------ // Begin signatures required by parallel_reduce //------------------------------------------------------------------------ bool is_divisible() const {return my_end-my_begin>my_grainsize;} bool empty() const {return my_end<=my_begin;} SieveRange( SieveRange& r, split ) : my_stride(r.my_stride), my_grainsize(r.my_grainsize), my_end(r.my_end) { assert( r.is_divisible() ); assert( r.assert_okay() ); Number middle = r.my_begin + (r.my_end-r.my_begin+r.my_stride-1)/2; middle = middle/my_stride*my_stride; my_begin = middle; r.my_end = middle; assert( assert_okay() ); assert( r.assert_okay() ); } //------------------------------------------------------------------------ // End of signatures required by parallel_reduce //------------------------------------------------------------------------ Number begin() const {return my_begin;} Number end() const {return my_end;} SieveRange( Number begin, Number end, Number stride, Number grainsize ) : my_begin(begin), my_end(end), my_stride(stride), my_grainsize(grainsize { assert( assert_okay() ); } }; //! Loop body for parallel_reduce. /** parallel_reduce splits the sieve into subsieves. Each subsieve handles a subrange of [0..n]. */ class Sieve { public: //! Prime multiples to consider, and working storage for this subsieve. Multiples multiples; //! Number of primes found so far by this subsieve. Number count; //! Construct Sieve for counting primes in [0..n]. Sieve( Number n ) : multiples(n), count(0) {} //------------------------------------------------------------------------ // Begin signatures required by parallel_reduce //------------------------------------------------------------------------ void operator()( const SieveRange& r ) { Number m = multiples.m; if( multiples.is_initialized() ) { // Simply reuse "multiples" structure from previous window // This works because parallel_reduce always applies // *this from left to right. } else { // Need to initialize "multiples" because *this is a forked copy // that needs to be set up to start at r.begin(). multiples.initialize( r.begin() ); } Number window_size = m; for( Number j=r.begin(); j assert( j%multiples.m==0 ); if( j+window_size>r.end() ) window_size = r.end()-j; count += multiples.find_primes_in_window( j, window_size ); } } void join( Sieve& other ) { count += other.count; } Sieve( Sieve& other, split ) : multiples(other.multiples,split()), count(0) {} //------------------------------------------------------------------------ // End of signatures required by parallel_reduce //------------------------------------------------------------------------ }; //! Count number of primes between 0 and n /** This is the parallel version. */ Number ParallelCountPrimes( Number n ) { // Two is special case Number count = n>=2; if( n>=3 ) { Sieve s(n); count += s.multiples.n_factor; if( PrintPrimes ) printf("---n"); parallel_reduce( SieveRange( s.multiples.m, n, s.multiples.m, GrainSize ), s ); count += s.count; } return count; } //------------------------------------------------------------------------ // Code below this line constitutes the driver that calls SerialCountPrimes // and ParallelCountPrimes. //------------------------------------------------------------------------ //! A closed range of Number. struct NumberRange { Number low; Number high; void set_from_string( const char* s ); NumberRange( Number low_, Number high_ ) : low(low_), high(high_) {} }; void NumberRange::set_from_string( const char* s ) { char* end; high = low = strtol(s,&end,0); switch( *end ) { case ':': high = strtol(end+1,0,0); break; case '0': break; default: printf("unexpected character = %cn",*end); } } //! Number of threads to use. static NumberRange NThread(0,4); //! If true, then at end wait for user to hit return static bool PauseFlag = false; //! Parse the command line. static Number ParseCommandLine( int argc, char* argv[] ) { Number n = 100000000; int i = 1; if( i PauseFlag = true; ++i; } if( i // Command line is garbled. fprintf(stderr,"Usage: %s ]n", argv[0]); fprintf(stderr,"where n is a positive integer [%lu]n",n); fprintf(stderr," nthread is a non-negative integer, or range of the form low:high [%ld:%lu]n",NThread.low,NThread.high); fprintf(stderr," grainsize is an optional postive integer [%lu]n",GrainSize); exit(1); } if( i n = strtol(argv[i++],0,0); if( i NThread.set_from_string(argv[i++]); if( i GrainSize = strtol(argv[i++],0,0); return n; } static void WaitForUser() { char c; printf("Press return to continue"); do { c = getchar(); } while( c!='n' ); < int main( int argc, char* argv[] ) { Number n = ParseCommandLine(argc,argv); // Try different numbers of threads for( Number p=NThread.low; p<=NThread.high; ++p ) { task_scheduler_init init(task_scheduler_init::deferred); // If p!=0, we are doing a parallel run if( p ) init.initialize(p); Number count; tick_count t0 = tick_count::now(); if( p==0 ) { count = SerialCountPrimes(n); } else { count = ParallelCountPrimes(n); } tick_count t1 = tick_count::now(); printf("#primes from [2..%lu] = %lu (%.2f sec with ", (unsigned long)n, (unsigned long)count, (t1-t0).seconds()); if( p ) printf("%lu-way parallelism)n", p ); else printf("serial code)n"); } if( PauseFlag ) { WaitForUser(); } return 0; } ```

Categories:
Tags:
For more complete information about compiler optimizations, see our Optimization Notice.