CnC
 All Classes Namespaces Functions Variables Typedefs Enumerator Friends Groups Pages
default_tuner.h
1 // Copyright (c) 2007-2013 Intel Corporation. All Rights Reserved. **
2 // **
3 // The source code contained or described herein and all documents related to **
4 // the source code ("Material") are owned by Intel Corporation or its suppliers **
5 // or licensors. Title to the Material remains with Intel Corporation or its **
6 // suppliers and licensors. The Material contains trade secrets and proprietary **
7 // and confidential information of Intel or its suppliers and licensors. The **
8 // Material is protected by worldwide copyright and trade secret laws and **
9 // treaty provisions. No part of the Material may be used, copied, reproduced, **
10 // modified, published, uploaded, posted, transmitted, distributed, or **
11 // disclosed in any way without Intel's prior express written permission. **
12 // **
13 // No license under any patent, copyright, trade secret or other intellectual **
14 // property right is granted to or conferred upon you by disclosure or delivery **
15 // of the Materials, either expressly, by implication, inducement, estoppel or **
16 // otherwise. Any license under such intellectual property rights must be **
17 // express and approved by Intel in writing. **
18 //********************************************************************************
19 
20 #ifndef CNC_DEFAULT_TUNER_H_ALREADY_INCLUDED
21 #define CNC_DEFAULT_TUNER_H_ALREADY_INCLUDED
22 
23 #include <cnc/internal/cnc_api.h>
24 #include <cnc/default_partitioner.h>
25 #include <cnc/internal/step_delayer.h>
26 #include <cnc/internal/cnc_tag_hash_compare.h>
27 #include <cnc/internal/no_range.h>
28 #include <cnc/internal/no_tag_table.h>
29 #include <cnc/internal/hash_tag_table.h>
30 #include <cnc/internal/item_properties.h>
31 #include <cnc/internal/item_properties.h>
32 #include <cnc/internal/dist/distributor.h>
33 #include <tbb/atomic.h>
34 #include <tbb/concurrent_unordered_set.h>
35 //#include <tbb/concurrent_hash_map.h>
36 
37 namespace CnC {
38 
39  template< class T > class context;
40  namespace Internal {
41  template< class Tag, class Range, class StepColl, class RangeStepI, class TIR, bool deps > struct range_step;
42  }
43 
44  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
45  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
46 
47  // Key-words tuning
48  enum {
49  COMPUTE_ON_LOCAL = -2, ///< let tuner::compute_on return COMPUTE_ON_NO if the step should be executed locally
50  COMPUTE_ON_ROUND_ROBIN = -3, ///< let tuner::compute_on return COMPUTE_ON_ROUND_ROBIN to let the scheduler distribute it in a round-robin fashion
51  COMPUTE_ON_ALL = -4, ///< let tuner::compute_on return COMPUTE_ON_ALL if the step should be executed on all processes, as well as locally
52  COMPUTE_ON_ALL_OTHERS = -5, ///< let tuner::compute_on return COMPUTE_ON_ALL_OTHERS if the step should be executed on all processes, but not locally
53  PRODUCER_UNKNOWN = -6, ///< producer process of dependent item is unknown
54  PRODUCER_LOCAL = -7, ///< producer process of dependent item is local process
55  CONSUMER_UNKNOWN = -8, ///< consumer process of given item is unkown
56  CONSUMER_LOCAL = -9, ///< consumer process of given item is the local process
57  CONSUMER_ALL = -10, ///< all processes consume given item
58  CONSUMER_ALL_OTHERS = -11, ///< all processes but this consume given item
59  NO_GETCOUNT = Internal::item_properties::NO_GET_COUNT, ///< no get-count specified
60  AFFINITY_HERE = Internal::scheduler_i::AFFINITY_HERE ///< default affinity to current thread
61  };
62 
63  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
64  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
65 
66  /// Functionality that might be needed to implement all kinds of tuners.
67  /// Always try to use higher level tuners which derive from this.
68  /// Always use virtual inheritance (see higher leve tuners).
69  class tuner_base
70  {
71  public:
72  /// returns id/rank of calling process
73  /// defaults to 0 if running on one process only.
74  inline static int myPid()
75  {
77  }
78  /// return total number of processes participating in this programm execution
79  /// defaults to 1 if running on one process only.
80  inline static int numProcs()
81  {
83  }
84  /// returns number of threads used by scheduler in given context
85  template< typename Ctxt >
86  inline static int numThreads( const Ctxt & ctxt )
87  {
88  return ctxt.numThreads();
89  }
90  };
91 
92 
93  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
94  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
95 
96  /// \brief Default (NOP) implementations of the step_tuner interface.
97  ///
98  /// Also defines the interface a user-provided tuner must satisfy.
99  /// Derive your tuner from this (to avoid implementing the entire interface).
100  ///
101  /// It is recommended that your tuner does not implement the methods as templates.
102  /// Instead, you should use the actual types that it expects.
103  ///
104  /// \#include <cnc/default_tuner.h>
105  template< bool check_deps = true >
106  struct /*CNC_API*/ step_tuner : public virtual tuner_base
107  {
108  /// \brief Allows defnintion of priorites to individual steps (which are identified by the tag).
109  /// \return the default implementation always return 1.
110  /// \param tag the tag which identifies the step to be executed
111  /// \param arg the argument as passed to context< Derived >::prescribed (usually the context)
112  /// \see also CNCROOT/samples/floyd_warshall
113  template< typename Tag, typename Arg >
114  int priority( const Tag & tag, Arg & arg ) const
115  {
116  return 1;
117  }
118 
119  /// \brief Allows declaration of data dependencies (to items) of
120  /// given step (identified by the tag).
121  ///
122  /// When a step-instance is prescribed through a corresponding
123  /// tag_collection::put, this method will be called. You can
124  /// declare dependencies to items by calling dC.depends(
125  /// item_collection, dependent_item_tag ) for every item the step
126  /// is going to 'get' in its execute method. The actual step
127  /// execution will be delayed until all dependencies can be
128  /// satisfied. The default implementation does nothing (NOP). Your
129  /// own implementation must accept dC by reference (T&).
130  /// dC.depends accepts an additional optional argument to declare the
131  /// process that will produce the item. You can either pass the process-rank
132  /// or CnC::PRODUCER_UKNOWN or CnC::PRODUCER_LOCAL. This producer argument
133  /// will have effect only if the tuner of the respective item-collection
134  /// returns CnC::CONSUMER_UKNOWN, otherwise it will be ignored.
135  /// \param tag the tag which identifies the step to be executed.
136  /// \param arg the argument as passed to context< Derived >::prescribed
137  /// (usually the context)
138  /// \param dC opaque object (must be by reference!) providing method depends
139  /// to declare item dependencies
140  template< typename Tag, typename Arg, typename T >
141  void depends( const Tag & tag, Arg & arg, T & dC ) const
142  {
143  }
144 
145  /// \brief Returns whether the step should be pre-scheduled
146  ///
147  /// Pre-scheduling provides an alternative method for detecting
148  /// data dependencies.
149  ///
150  /// The step instance will be run immediately when prescribed
151  /// by a tag_collection::put. All items that are not yet available
152  /// when accessed by the non-blocking get method will automatically
153  /// be treated as dependent items. The pre-run will end at
154  /// context::flush_gets() if any items are unavailable. The step
155  /// execution will be delayed until all detected dependencies can
156  /// be satisfied.
157  bool preschedule() const
158  {
159  return false;
160  }
161 
162  /// \brief Tell the scheduler the preferred thread for executing given step
163  ///
164  /// Not all schedulers might actually evaluate this call (see \ref scheduler);
165  /// it involves a virtual function call whenever a step is (re-)scheduled.
166  /// This feature is most useful in combination with
167  /// the CNC_PIN_THREADS environment variable (\ref priorpin).
168  /// \return thread id or AFFINITY_HERE (default)
169  template< typename Tag, typename Arg >
170  int affinity( const Tag & /*tag*/, Arg & /*arg*/ ) const
171  {
172  return AFFINITY_HERE;
173  }
174 
175  /// \brief tell the scheduler on which process to run the step (or range of steps) (distCnC)
176  /// \return process id where the step will be executed, or COMPUTE_ON_ROUND_ROBIN, or COMPUTE_ON_LOCAL, or COMPUTE_ON_ALL, or COMPUTE_ON_ALL_OTHERS
177  template< typename Tag, typename Arg >
178  int compute_on( const Tag & /*tag*/, Arg & /*arg*/ ) const
179  {
180  return COMPUTE_ON_ROUND_ROBIN;
181  }
182 
183  /// true if steps launched through ranges consume items or need global locking, false otherwise.
184  /// Avoiding checks for dependencies and global locks saves overhead and will perform better (e.g. for parallel_for).
185  /// Safe execution (with checks) is the default (check_deps template argument).
186  static const bool check_deps_in_ranges = check_deps;
187 
188  /// \brief check for cancelation of given step
189  ///
190  /// \return true if step was canceled, false otherwise (default)
191  /// \note Must be thread-safe.
192  /// Runtime will try to not execute the step, but it might still get executed.
193  /// Best effort - but no guarantees.
194  /// Canceling steps makes all determinism guarantees void.
195  ///
196  /// For distributed memory your implementation might require to sync its state
197  /// across processes. Currently there is no API exposed to do that conveniently.
198  /// However, an example implementatino CnC::cancel_tuner is provided which
199  /// works on distributed memory.
200  /// \see also CNCROOT/samples/floyd_warshall
201  template< typename Tag, typename Arg >
202  int was_canceled( const Tag & /*tag*/, Arg & /*arg*/ ) const
203  {
204  return false;
205  }
206  };
207 
208  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
209 
210  /// \brief Default (NOP) implementations of the pfor_tuner interface.
211  ///
212  /// Also defines the interface a user-provided step-tuner must satisfy.
213  /// Derive your tuner from this (to avoid implementing the entire interface).
214  ///
215  /// It is recommended that your tuner does not implement the methods as templates.
216  /// Instead, you should use the actual types that it expects.
217  ///
218  /// \#include <cnc/default_tuner.h>
219  template< bool check_deps = true, typename Partitioner = default_partitioner<> >
220  struct /*CNC_API*/ pfor_tuner : public virtual tuner_base
221  {
222  template< typename Tag, typename Arg >
223  int priority( const Tag & tag, Arg & arg ) const
224  {
225  return 1;
226  }
227 
228  template< typename Tag, typename Arg, typename T >
229  void depends( const Tag & tag, Arg & arg, T & dC ) const
230  {
231  }
232 
233  bool preschedule() const
234  {
235  return false;
236  }
237 
238  template< typename Tag, typename Arg >
239  int affinity( const Tag & /*tag*/, Arg & /*arg*/ ) const
240  {
241  return AFFINITY_HERE;
242  }
243 
244  static const bool check_deps_in_ranges = check_deps;
245 
246  typedef Partitioner partitioner_type;
247 
248  partitioner_type partitioner() const
249  {
250  return typename pfor_tuner< check_deps, Partitioner >::partitioner_type();
251  }
252  };
253 
254  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
255  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
256 
257  namespace CT {
258 #ifdef _DIST_CNC_
259  static const char SINGLE = 0; // single tag
260  static const char ALL = 1; // cancel all
261  static const char RESET = 2; // reset
262 #endif
263  }
264 
265  /// \brief Step tuner with convenient cancelation capabilities
266  ///
267  /// Allows cancelation of individual step-instances by their tags as well as
268  /// canceling all instances at once. All cancelation requests are "active" until
269  /// unsafe_reset() is called (or the tuner is destructed).
270  ///
271  /// To use it, you need a cancel_tuner object in your context which you pass
272  /// to the constructor of the respective step_collection.
273  ///
274  /// It works on distributed memory but might perform poorly if used frequently.
275  ///
276  /// \param Tag tag-type
277  /// \param check_deps if false, avoid some mechanics to handle unavailable items
278  /// \param Hasher hash-functor for Tag, defaults to tbb::tbb_hash< Tag >
279  /// \param Equality equality operator for Tag, defaults to std::equal_to< Tag >
280  /// \note It is assumed that cancelation per instance happens relatively rarely. Hence
281  /// no automatic garbage collection of the tags is provided. If you cancel individual
282  /// step-instances frequently, it is recommended to prune the internal data structure
283  /// from time to time in a safe state through unsafe_reset().
284  /// \see also CNCROOT/samples/floyd_warshall
285  template< typename Tag, bool check_deps = true,
286  typename Hasher = cnc_hash< Tag >, typename Equality = cnc_equal< Tag > >
287  class cancel_tuner : public step_tuner< check_deps >, public Internal::distributable
288  {
289  public:
290  template< typename C >
291  cancel_tuner( C & ctxt )
292  : Internal::distributable( "cancel_tuner" ), m_context( ctxt ), m_canceledTags(), m_cancelAll()
293  {
294  m_context.subscribe( this );
295  m_cancelAll = false;
296  }
297 
298  ~cancel_tuner()
299  {
300  m_context.unsubscribe( this );
301  }
302 
303  /// \brief cancel given step (identified by tag)
304  void cancel( const Tag & t, bool from_msg = false )
305  {
306  if( ! m_cancelAll ) {
307 #ifdef _DIST_CNC_
308  if( !from_msg && Internal::distributor::numProcs() > 1 ) {
309  serializer * _ser = m_context.new_serializer( this );
310  (*_ser) & CT::SINGLE & t;
311  m_context.bcast_msg( _ser );
312  }
313 #endif
314  m_canceledTags.insert( t );
315  }
316  }
317 
318  /// \brief cancel all steps
319  void cancel_all( bool from_msg = false )
320  {
321 #ifdef _DIST_CNC_
322  if( !from_msg && Internal::distributor::numProcs() > 1 ) {
323  serializer * _ser = m_context.new_serializer( this );
324  (*_ser) & CT::ALL;
325  m_context.bcast_msg( _ser );
326  }
327 #endif
328  m_cancelAll = true;
329  }
330 
331  void unsafe_reset( )
332  {
333  unsafe_reset( true );
334  }
335 
336  /// \brief implements/overwrites step_tuner::was_canceled(...)
337  template< typename Arg >
338  int was_canceled( const Tag & tag, Arg & /*arg*/ ) const
339  {
340  return m_cancelAll == true || m_canceledTags.count( tag ) > 0;
341  }
342 
343  // from distributable
344  virtual void recv_msg( serializer * ser )
345  {
346 #ifdef _DIST_CNC_
347  CNC_ASSERT( Internal::distributor::active() );
348  char _msg;
349  (*ser) & _msg;
350  switch( _msg ) {
351  case CT::SINGLE :
352  Tag _tag;
353  (*ser) & _tag;
354  this->cancel( _tag, true );
355  break;
356  case CT::ALL :
357  this->cancel_all( true );
358  break;
359  case CT::RESET :
360  this->unsafe_reset( false );
361  break;
362  default:
363  CNC_ABORT( "Unexpected message received (cancel_tuner)." );
364  }
365 #endif
366  }
367 
368  private:
369  /// \brief reset all current cancel states
370  /// \note not thread-safe, to be called in safe state only
371  /// (between program start or calling context::wait() and putting the first tag or item).
372  virtual void unsafe_reset( bool dist )
373  {
374 #ifdef _DIST_CNC_
375  if( dist && Internal::distributor::numProcs() > 1 ) {
376  serializer * _ser = m_context.new_serializer( this );
377  (*_ser) & CT::RESET;
378  m_context.bcast_msg( _ser );
379  }
380 #endif
381  m_canceledTags.clear();
382  m_cancelAll = false;
383  }
384 
385  Internal::distributable_context & m_context;
386  tbb::concurrent_unordered_set< Tag, Hasher, Equality > m_canceledTags;
387  tbb::atomic< bool > m_cancelAll;
388  };
389 
390 
391  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
392  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
393 
394  /// \brief Default implementations of the item-tuner interface for item-collections
395  ///
396  /// Usually you will not need to use this directly for anything else than documentation and interface.
397  template< template< typename T, typename I > class TT >
398  struct item_tuner : public virtual tuner_base
399  {
400  /// \brief Defines the type of the internal data store.
401  ///
402  /// It forwards the functionality of the template parameter template.
403  /// The expected interface of the template is not yet exposed.
404  /// Use hashmap_tuner or vector_tuner to derive you own tuner.
405  template< typename Tag, typename Item >
406  struct table_type : public TT< Tag, Item >
407  {
408  table_type( size_t sz = 0 ) : TT< Tag, Item >( sz ) {}
409  };
410 
411  /// \brief Allows specifying the number of gets to the given item.
412  ///
413  /// After get_count() many 'get()'s the item can be removed from the collection.
414  /// By default, the item is not removed until the collection is deleted.
415  /// \param tag the tag which identifies the item
416  /// \return number of expected gets to this item, or CNC::NO_GETCOUNT
417  template< typename Tag >
418  int get_count( const Tag & tag ) const
419  {
420  return NO_GETCOUNT;
421  }
422 
423  /// \brief Tells the scheduler on which process(es) this item is going to be consumed
424  /// return process id where the item will be consumed (get), or CONSUMER_UNKNOWN (default)
425  /// or std::vector<int>, containing all ids of consuming processes.
426  /// To indicate that the consumer processes are unknown, return an empty vector.
427  /// The vector must contain special values like CONSUMER_LOCAL.
428  /// If not CnC::CONSUMER_UKNOWN (or empty vector), this declaration will overwrite what
429  /// the step-tuner might declare in depends.
430  /// Providing this method leads to the most efficient communication pattern for
431  /// to getting the data to were it is needed.
432  /// \param tag the tag which identifies the item
433  template< typename Tag >
434  int consumed_on( const Tag & tag ) const
435  {
436  return CONSUMER_UNKNOWN;
437  }
438 
439  /// \brief Tells the scheduler on which process(es) this item is going to be produced
440  /// return process id where the item will be produced. If unknown return CnC::PRODUCER_UNKNOWN.
441  /// return PRODUCER_LOCAL if local process is the owner.
442  /// Implementing this method reduces the communication cost for locating and sending data.
443  /// Implementing item_tuner::consumed_on is more efficient, but might be more complicated.
444  /// Will be evaluated only if item_tuner::consumed_on returns CnC::CONSUMER_UNKNOWN
445  /// \param tag the tag which identifies the item
446  template< typename Tag >
447  int produced_on( const Tag & tag ) const
448  {
449  return PRODUCER_UNKNOWN;
450  }
451  };
452 
453  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
454 
455  namespace Internal {
456  template< class Tag, class ItemT > class hash_item_table;
457  template< class Tag, class ItemT > class vec_item_table;
458  }
459 
460  /// \brief The tuner base for hashmap-based item-tuners.
461  ///
462  /// The internal hash-map uses cnc_tag_hash_compare. If your
463  /// tag-type is not supported by default, you need to provide a
464  /// template specialization for it.
465  struct hashmap_tuner : public item_tuner< Internal::hash_item_table >
466  {
467  };
468 
469  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
470 
471  /// \brief The tuner base for vector-based item-tuners.
472  ///
473  /// Your tags must be convertable to and from size_t. You must
474  /// provide the maximum value before accessing the collection
475  /// (constructor or set_max). The runtime will allocate as many
476  /// slots. Hence, use this only if your tags-space is dense,
477  /// without a large offset and if it is not too large.
478  struct vector_tuner : public item_tuner< Internal::vec_item_table >
479  {
480  };
481 
482  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
483  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
484 
485  /// Default implementations of the tag-tuner interface for tag-collections
486  /// Use this if you are going put ranges. Optional argument is a custom partitioner.
487  /// Ranges don't work with memoization (yet)
488  template< typename Range = Internal::no_range, typename Partitioner = default_partitioner<> >
489  struct tag_tuner : public virtual tuner_base
490  {
491  /// A tag tuner must provide the type of the range, default is no range
492  typedef Range range_type;
493  /// A tag tuner must provide a tag-table type; default is no tag-table
494  typedef Internal::no_tag_table tag_table_type;
495  /// \brief The type of the partitioner
496  typedef Partitioner partitioner_type;
497 
498  /// \brief return a partitioner for range-based features, such as parallel_for
499  /// \see default_partitioner for the expected signature of partitioners
500  /// overwrite partitioner() if it doesn't come with default-constructor or
501  /// if the default constructor is insufficient.
503  {
505  }
506 
507  /// return true if tag memoization is wanted; returns false by default (with no_tag_table)
508  bool preserve_tags() const
509  {
510  return false;
511  };
512  };
513 
514  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
515 
516  /// Use this if your tag-collection should preserve tags (memoization)
517  /// Memoization doesn't work with ranges (yet)
518  template< typename Tag, typename H = cnc_hash< Tag >, typename E = cnc_equal< Tag > >
519  struct preserve_tuner : public tag_tuner< Internal::no_range, default_partitioner<> >
520  {
521  /// A tag tuner must provide a tag-table type; default is no tag-table
522  typedef Internal::hash_tag_table< Tag, H, E > tag_table_type;
523 
524  bool preserve_tags() const
525  {
526  return true;
527  };
528  };
529 
530  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
531  // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
532 
533  namespace Internal {
534  template< typename Tuner >
535  const Tuner & get_default_tuner()
536  {
537  static tbb::atomic< Tuner * > s_tuner;
538  if( s_tuner == NULL ) {
539  Tuner * _tmp = new Tuner;
540  if( s_tuner.compare_and_swap( _tmp, NULL ) != NULL ) delete _tmp;
541  }
542  return *s_tuner;
543  }
544  }
545 
546 } // end namespace CnC
547 
548 #endif //CNC_DEFAULT_TUNER_H_ALREADY_INCLUDED