STXXL  1.4-dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pq_mergers.h
Go to the documentation of this file.
1 /***************************************************************************
2  * include/stxxl/bits/containers/pq_mergers.h
3  *
4  * Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  * Copyright (C) 1999 Peter Sanders <[email protected]>
7  * Copyright (C) 2003, 2004, 2007 Roman Dementiev <[email protected]>
8  * Copyright (C) 2007-2009 Johannes Singler <[email protected]>
9  * Copyright (C) 2007, 2008 Andreas Beckmann <[email protected]>
10  *
11  * Distributed under the Boost Software License, Version 1.0.
12  * (See accompanying file LICENSE_1_0.txt or copy at
13  * http://www.boost.org/LICENSE_1_0.txt)
14  **************************************************************************/
15 
16 #ifndef STXXL_CONTAINERS_PQ_MERGERS_HEADER
17 #define STXXL_CONTAINERS_PQ_MERGERS_HEADER
18 
20 
22 
23 //! \addtogroup stlcontinternals
24 //!
25 //! \{
26 
27 /*! \internal
28  */
29 namespace priority_queue_local {
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 // auxiliary functions
33 
34 // merge length elements from the two sentinel terminated input
35 // sequences source0 and source1 to target
36 // advance source0 and source1 accordingly
37 // require: at least length nonsentinel elements available in source0, source1
38 // require: target may overwrite one of the sources as long as
39 // *(sourcex + length) is before the end of sourcex
40 template <class InputIterator, class OutputIterator, class CompareType>
42  InputIterator& source0,
43  InputIterator& source1,
44  OutputIterator target, OutputIterator end,
45  CompareType& cmp)
46 {
47  while (target != end)
48  {
49  if (cmp(*source0, *source1))
50  {
51  *target = *source1;
52  ++source1;
53  }
54  else
55  {
56  *target = *source0;
57  ++source0;
58  }
59  ++target;
60  }
61 }
62 
63 // merge length elements from the three sentinel terminated input
64 // sequences source0, source1 and source2 to target
65 // advance source0, source1 and source2 accordingly
66 // require: at least length nonsentinel elements available in source0, source1 and source2
67 // require: target may overwrite one of the sources as long as
68 // *(sourcex + length) is before the end of sourcex
69 template <class InputIterator, class OutputIterator,
70  class CompareType>
72  InputIterator& source0,
73  InputIterator& source1,
74  InputIterator& source2,
75  OutputIterator target, OutputIterator end,
76  CompareType& cmp)
77 {
78  if (cmp(*source1, *source0)) {
79  if (cmp(*source2, *source1)) {
80  goto s012;
81  }
82  else {
83  if (cmp(*source0, *source2)) {
84  goto s201;
85  }
86  else {
87  goto s021;
88  }
89  }
90  }
91  else {
92  if (cmp(*source2, *source1)) {
93  if (cmp(*source2, *source0)) {
94  goto s102;
95  }
96  else {
97  goto s120;
98  }
99  }
100  else {
101  goto s210;
102  }
103  }
104 
105 #define Merge3Case(a, b, c) \
106  s ## a ## b ## c: \
107  if (target == end) \
108  return; \
109  *target = *source ## a; \
110  ++target; \
111  ++source ## a; \
112  if (cmp(*source ## b, *source ## a)) \
113  goto s ## a ## b ## c; \
114  if (cmp(*source ## c, *source ## a)) \
115  goto s ## b ## a ## c; \
116  goto s ## b ## c ## a;
117 
118  // the order is chosen in such a way that
119  // four of the trailing gotos can be eliminated by the optimizer
120  Merge3Case(0, 1, 2);
121  Merge3Case(1, 2, 0);
122  Merge3Case(2, 0, 1);
123  Merge3Case(1, 0, 2);
124  Merge3Case(0, 2, 1);
125  Merge3Case(2, 1, 0);
126 
127 #undef Merge3Case
128 }
129 
130 // merge length elements from the four sentinel terminated input
131 // sequences source0, source1, source2 and source3 to target
132 // advance source0, source1, source2 and source3 accordingly
133 // require: at least length nonsentinel elements available in source0, source1, source2 and source3
134 // require: target may overwrite one of the sources as long as
135 // *(sourcex + length) is before the end of sourcex
136 template <class InputIterator, class OutputIterator,
137  class CompareType>
139  InputIterator& source0,
140  InputIterator& source1,
141  InputIterator& source2,
142  InputIterator& source3,
143  OutputIterator target, OutputIterator end,
144  CompareType& cmp)
145 {
146 #define StartMerge4(a, b, c, d) \
147  if ((!cmp(*source ## a, *source ## b)) && \
148  (!cmp(*source ## b, *source ## c)) && \
149  (!cmp(*source ## c, *source ## d))) \
150  goto s ## a ## b ## c ## d;
151 
152  // b>a c>b d>c
153  // a<b b<c c<d
154  // a<=b b<=c c<=d
155  // !(a>b) !(b>c) !(c>d)
156 
157  StartMerge4(0, 1, 2, 3);
158  StartMerge4(1, 2, 3, 0);
159  StartMerge4(2, 3, 0, 1);
160  StartMerge4(3, 0, 1, 2);
161 
162  StartMerge4(0, 3, 1, 2);
163  StartMerge4(3, 1, 2, 0);
164  StartMerge4(1, 2, 0, 3);
165  StartMerge4(2, 0, 3, 1);
166 
167  StartMerge4(0, 2, 3, 1);
168  StartMerge4(2, 3, 1, 0);
169  StartMerge4(3, 1, 0, 2);
170  StartMerge4(1, 0, 2, 3);
171 
172  StartMerge4(2, 0, 1, 3);
173  StartMerge4(0, 1, 3, 2);
174  StartMerge4(1, 3, 2, 0);
175  StartMerge4(3, 2, 0, 1);
176 
177  StartMerge4(3, 0, 2, 1);
178  StartMerge4(0, 2, 1, 3);
179  StartMerge4(2, 1, 3, 0);
180  StartMerge4(1, 3, 0, 2);
181 
182  StartMerge4(1, 0, 3, 2);
183  StartMerge4(0, 3, 2, 1);
184  StartMerge4(3, 2, 1, 0);
185  StartMerge4(2, 1, 0, 3);
186 
187 #define Merge4Case(a, b, c, d) \
188  s ## a ## b ## c ## d: \
189  if (target == end) \
190  return; \
191  *target = *source ## a; \
192  ++target; \
193  ++source ## a; \
194  if (cmp(*source ## c, *source ## a)) \
195  { \
196  if (cmp(*source ## b, *source ## a)) \
197  goto s ## a ## b ## c ## d; \
198  else \
199  goto s ## b ## a ## c ## d; \
200  } \
201  else \
202  { \
203  if (cmp(*source ## d, *source ## a)) \
204  goto s ## b ## c ## a ## d; \
205  else \
206  goto s ## b ## c ## d ## a; \
207  }
208 
209  Merge4Case(0, 1, 2, 3);
210  Merge4Case(1, 2, 3, 0);
211  Merge4Case(2, 3, 0, 1);
212  Merge4Case(3, 0, 1, 2);
213 
214  Merge4Case(0, 3, 1, 2);
215  Merge4Case(3, 1, 2, 0);
216  Merge4Case(1, 2, 0, 3);
217  Merge4Case(2, 0, 3, 1);
218 
219  Merge4Case(0, 2, 3, 1);
220  Merge4Case(2, 3, 1, 0);
221  Merge4Case(3, 1, 0, 2);
222  Merge4Case(1, 0, 2, 3);
223 
224  Merge4Case(2, 0, 1, 3);
225  Merge4Case(0, 1, 3, 2);
226  Merge4Case(1, 3, 2, 0);
227  Merge4Case(3, 2, 0, 1);
228 
229  Merge4Case(3, 0, 2, 1);
230  Merge4Case(0, 2, 1, 3);
231  Merge4Case(2, 1, 3, 0);
232  Merge4Case(1, 3, 0, 2);
233 
234  Merge4Case(1, 0, 3, 2);
235  Merge4Case(0, 3, 2, 1);
236  Merge4Case(3, 2, 1, 0);
237  Merge4Case(2, 1, 0, 3);
238 
239 #undef StartMerge4
240 #undef Merge4Case
241 }
242 
243 ////////////////////////////////////////////////////////////////////////////////
244 // Loser tree data structure from Knuth, "Sorting and Searching", Section 5.4.1
245 
246 /*!
247  * Loser tree from Knuth, "Sorting and Searching", Section 5.4.1
248  * \tparam Arity maximum arity of merger, does not need to be a power of 2
249  */
250 template <class ArraysType, class CompareType, unsigned Arity>
251 class loser_tree : private noncopyable
252 {
253 public:
254  //! type of arrays container linked with loser tree
255  typedef ArraysType arrays_type;
256  //! comparator object type
257  typedef CompareType compare_type;
258 
259  // arity_bound / 2 < arity <= arity_bound
260  enum { arity = Arity, max_arity = 1UL << (LOG2<Arity>::ceil) };
261 
262  //! type of values stored in the arrays container
263  typedef typename arrays_type::value_type value_type;
264  //! type of the ordered sequences in the arrays container
265  typedef typename arrays_type::sequence_type sequence_type;
266 
267 public:
268  //! the comparator object
270 
271  //! current tree size, invariant (k == 1 << logK), always a power of two
272  unsigned_type k;
273  //! log of current tree size
274  unsigned_type logK;
275 
276  // only entries 0 .. arity-1 may hold actual sequences, the other
277  // entries arity .. max_arity-1 are sentinels to make the size of the tree
278  // a power of 2 always
279 
280 protected:
281  //! reference to the linked arrays
282  arrays_type& arrays;
283 
284  //! type of nodes in the loser tree
285  struct Entry
286  {
287  value_type key; //!< Key of Loser element (winner for 0)
288  unsigned_type index; //!< number of losing segment
289  };
290 
291  //! levels of loser tree: entry[0] contains the winner info
292  struct Entry entry[max_arity];
293 
294  //! stack of free player indices
296 
297 public:
298  loser_tree(const compare_type& c, arrays_type& a)
299  : cmp(c), k(1), logK(0), arrays(a)
300  {
301  // verify strict weak ordering
302  assert(!cmp(cmp.min_value(), cmp.min_value()));
303  }
304 
305  void initialize()
306  {
307  // initial state: one empty player slot
308  free_slots.push(0);
309 
310  rebuild_loser_tree();
311 
312  assert(arrays.is_array_empty(0) && !arrays.is_array_allocated(0));
313  }
314 
315  //! True if a is the sentinel value
316  bool is_sentinel(const value_type& a) const
317  {
318  return !(cmp(cmp.min_value(), a)); // a <= cmp.min_value()
319  }
320 
321  //! Allocate a free slot for a new player.
323  {
324  // get a free slot
325  if (free_slots.empty()) {
326  // tree is too small, attempt to enlarge
327  double_k();
328  }
329 
330  assert(!free_slots.empty());
331  unsigned_type index = free_slots.top();
332  free_slots.pop();
333 
334  return index;
335  }
336 
337  //! Free a finished player's slot
339  {
340  // push on the stack of free segment indices
341  free_slots.push(slot);
342  }
343 
344  //! Whether there is still space for new array
345  bool is_space_available() const
346  {
347  return (k < arity) || !free_slots.empty();
348  }
349 
350  //! rebuild loser tree information from the values in current
352  {
353  unsigned_type winner = init_winner(1);
354  entry[0].index = winner;
355  entry[0].key = *arrays.get_array(winner);
356  }
357 
358  // given any values in the leaves this
359  // routing recomputes upper levels of the tree
360  // from scratch in linear time
361  // initialize entry[root].index and the subtree rooted there
362  // return winner index
364  {
365  if (root >= k || root >= max_arity)
366  { // leaf reached
367  return root - k;
368  }
369  else
370  {
371  unsigned_type left = init_winner(2 * root);
372  unsigned_type right = init_winner(2 * root + 1);
373  value_type lk = *arrays.get_array(left);
374  value_type rk = *arrays.get_array(right);
375  assert(root < max_arity);
376 
377  if (!(cmp(lk, rk)))
378  {
379  // right subtree looses
380  entry[root].index = right;
381  entry[root].key = rk;
382  return left;
383  }
384  else
385  {
386  entry[root].index = left;
387  entry[root].key = lk;
388  return right;
389  }
390  }
391  }
392 
393  /*!
394  * Update loser tree on insert or decrement of player index first go up the
395  * tree all the way to the root hand down old winner for the respective
396  * subtree based on new value, and old winner and loser update each node on
397  * the path to the root top down. This is implemented recursively
398  */
400  const value_type& newKey, unsigned_type newIndex,
401  value_type* winner_key,
402  unsigned_type* winner_index, // old winner
403  unsigned_type* mask) // 1 << (ceil(log KNK) - dist-from-root)
404  {
405  if (node == 0)
406  {
407  // winner part of root
408  *mask = (unsigned_type)(1) << (logK - 1);
409  *winner_key = entry[0].key;
410  *winner_index = entry[0].index;
411  if (cmp(entry[node].key, newKey))
412  {
413  entry[node].key = newKey;
414  entry[node].index = newIndex;
415  }
416  }
417  else
418  {
419  update_on_insert(node >> 1, newKey, newIndex, winner_key, winner_index, mask);
420  value_type loserKey = entry[node].key;
421  unsigned_type loserIndex = entry[node].index;
422  if ((*winner_index & *mask) != (newIndex & *mask)) { // different subtrees
423  // newKey will have influence here
424  if (cmp(loserKey, newKey)) {
425  if (cmp(*winner_key, newKey)) {
426  // old winner loses here
427  entry[node].key = *winner_key;
428  entry[node].index = *winner_index;
429  }
430  else {
431  // new entry loses here
432  entry[node].key = newKey;
433  entry[node].index = newIndex;
434  }
435  }
436  *winner_key = loserKey;
437  *winner_index = loserIndex;
438  }
439  // note that nothing needs to be done if the winner came from the
440  // same subtree
441  // a) newKey <= winner_key => even more reason for the other tree to lose
442  // b) newKey > winner_key => the old winner will beat the new
443  // entry further down the tree
444  // also the same old winner is handed down the tree
445 
446  *mask >>= 1; // next level
447  }
448  }
449 
450  //! Initial call to recursive update_on_insert
452  const value_type& newKey, unsigned_type newIndex)
453  {
454  value_type dummyKey;
455  unsigned_type dummyIndex, dummyMask;
456  update_on_insert(node, newKey, newIndex,
457  &dummyKey, &dummyIndex, &dummyMask);
458  }
459 
460  //! make the tree twice as wide
461  void double_k()
462  {
463  STXXL_VERBOSE1("double_k (before) k=" << k << " logK=" << logK << " arity=" << arity << " max_arity=" << max_arity << " #free=" << free_slots.size());
464  assert(k > 0);
465  assert(k < arity);
466  assert(free_slots.empty()); // stack was free (probably not needed)
467 
468  // make all new entries free and push them on the free stack
469  for (unsigned_type i = 2 * k - 1; i >= k; i--) //backwards
470  {
471  arrays.make_array_sentinel(i);
472  if (i < arity)
473  free_slots.push(i);
474  }
475 
476  // double the size
477  k *= 2;
478  logK++;
479 
480  STXXL_VERBOSE1("double_k (after) k=" << k << " logK=" << logK << " arity=" << arity << " max_arity=" << max_arity << " #free=" << free_slots.size());
481  assert(!free_slots.empty());
482  assert(k <= max_arity);
483 
484  // recompute loser tree information
485  rebuild_loser_tree();
486  }
487 
488  //! compact nonempty segments in the left half of the tree
490  {
491  STXXL_VERBOSE3("compact_tree (before) k=" << k << " logK=" << logK << " #free=" << free_slots.size());
492  assert(logK > 0);
493 
494  // compact all nonempty segments to the left
495  unsigned_type last_empty = 0;
496  for (unsigned_type pos = 0; pos < k; pos++)
497  {
498  if (!arrays.is_array_empty(pos))
499  {
500  assert(arrays.is_array_allocated(pos));
501  if (pos != last_empty)
502  {
503  assert(!arrays.is_array_allocated(last_empty));
504  arrays.swap_arrays(last_empty, pos);
505  }
506  ++last_empty;
507  }
508  /*
509  else
510  {
511  if(segment[pos])
512  {
513  STXXL_VERBOSE2("int_arrays::compact_tree() deleting segment "<<pos<<
514  " address: "<<segment[pos]<<" size: "<<segment_size[pos]);
515  delete [] segment[pos];
516  segment[pos] = 0;
517  mem_cons_ -= segment_size[pos];
518  }
519  }*/
520  }
521 
522  // half degree as often as possible
523  while ((k > 1) && last_empty <= (k / 2))
524  {
525  k /= 2;
526  logK--;
527  }
528 
529  // overwrite garbage and compact the stack of free segment indices
530  free_slots.clear(); // none free
531  for ( ; last_empty < k; last_empty++)
532  {
533  assert(!arrays.is_array_allocated(last_empty));
534  arrays.make_array_sentinel(last_empty);
535  if (last_empty < arity)
536  free_slots.push(last_empty);
537  }
538 
539  STXXL_VERBOSE3("compact_tree (after) k=" << k << " logK=" << logK << " #free=" << free_slots.size());
540 
541  // recompute loser tree information
542  rebuild_loser_tree();
543  }
544 
545  //! compact tree if it got considerably smaller
547  {
548  const unsigned_type num_segments_used = k - free_slots.size();
549  const unsigned_type num_segments_trigger = k - (3 * k / 5);
550  // using k/2 would be worst case inefficient (for large k)
551  // for k \in {2, 4, 8} the trigger is k/2 which is good
552  // because we have special mergers for k \in {1, 2, 4}
553  // there is also a special 3-way-merger, that will be
554  // triggered if k == 4 && is_array_atsentinel(3)
555  STXXL_VERBOSE3("int_merger compact? k=" << k << " #used=" << num_segments_used
556  << " <= #trigger=" << num_segments_trigger << " ==> "
557  << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
558  << " || "
559  << ((k == 4 && !free_slots.empty() && !arrays.is_array_empty(3)) ? "yes" : "no ")
560  << " #free=" << free_slots.size());
561  if (k > 1 &&
562  ((num_segments_used <= num_segments_trigger) ||
563  (k == 4 && !free_slots.empty() && !arrays.is_array_empty(3))))
564  {
565  compact_tree();
566  }
567  }
568 
569  //! multi-merge for arbitrary K
570  template <class OutputIterator>
571  void multi_merge_k(OutputIterator begin, OutputIterator end)
572  {
573  Entry* current_pos;
574  value_type current_key;
575  unsigned_type current_index; // leaf pointed to by current entry
576  unsigned_type winner_index = entry[0].index;
577  value_type winner_key = entry[0].key;
578 
579  while (begin != end)
580  {
581  // write result
582  *begin++ = *arrays.get_array(winner_index);
583 
584  // advance winner segment
585  ++(arrays.get_array(winner_index));
586 
587  winner_key = *arrays.get_array(winner_index);
588 
589  // remove winner segment if empty now
590  if (is_sentinel(winner_key))
591  arrays.free_array(winner_index);
592 
593  // go up the entry-tree
594  for (unsigned_type i = (winner_index + k) >> 1; i > 0; i >>= 1)
595  {
596  current_pos = entry + i;
597  current_key = current_pos->key;
598  if (cmp(winner_key, current_key))
599  {
600  current_index = current_pos->index;
601  current_pos->key = winner_key;
602  current_pos->index = winner_index;
603  winner_key = current_key;
604  winner_index = current_index;
605  }
606  }
607  }
608  entry[0].index = winner_index;
609  entry[0].key = winner_key;
610  }
611 
612  template <int LogK, typename OutputIterator>
613  void multi_merge_f(OutputIterator begin, OutputIterator end)
614  {
615  unsigned_type winner_index = entry[0].index;
616  value_type winner_key = entry[0].key;
617 
618  // TODO: reinsert assert(log_k >= LogK);
619  while (begin != end)
620  {
621  // write result
622  *begin++ = *arrays.get_array(winner_index);
623 
624  // advance winner segment
625  ++(arrays.get_array(winner_index));
626 
627  winner_key = *arrays.get_array(winner_index);
628 
629  // remove winner segment if empty now
630  if (is_sentinel(winner_key))
631  arrays.free_array(winner_index);
632 
633  // update loser tree
634 #define TreeStep(L) \
635  if (1 << LogK >= 1 << L) { \
636  int pos_shift = ((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0; \
637  Entry* pos = entry + ((winner_index + (1 << LogK)) >> pos_shift); \
638  value_type key = pos->key; \
639  if (cmp(winner_key, key)) { \
640  unsigned_type index = pos->index; \
641  pos->key = winner_key; \
642  pos->index = winner_index; \
643  winner_key = key; \
644  winner_index = index; \
645  } \
646  }
647  TreeStep(10);
648  TreeStep(9);
649  TreeStep(8);
650  TreeStep(7);
651  TreeStep(6);
652  TreeStep(5);
653  TreeStep(4);
654  TreeStep(3);
655  TreeStep(2);
656  TreeStep(1);
657 #undef TreeStep
658  }
659  entry[0].index = winner_index;
660  entry[0].key = winner_key;
661  }
662 
663  //! extract the (length = end - begin) smallest elements and write them to
664  //! [begin..end) empty segments are deallocated. Requires: there are at
665  //! least length elements and segments are ended by sentinels.
666  template <class OutputIterator>
667  void multi_merge(OutputIterator begin, OutputIterator end)
668  {
669  int_type length = end - begin;
670 
671  STXXL_VERBOSE3("multi_merge(length=" << length << ") from sequences k=" << k);
672 
673  if (begin == end)
674  return;
675 
676  assert(k > 0);
677 
678  // This is the place to make statistics about external multi_merge calls.
679 
680  arrays.prefetch_arrays();
681 
682  switch (logK) {
683  case 0: {
684  assert(k == 1);
685  assert(entry[0].index == 0);
686  assert(free_slots.empty());
687 
688  // in int_merger:
689  // memcpy(target, states[0], length * sizeof(value_type));
690 
691  sequence_type& seq = arrays.get_array(0);
692  for (int_type i = 0; i < length; ++i, ++seq, ++begin)
693  *begin = *seq;
694  entry[0].key = *seq;
695 
696  if (arrays.is_array_empty(0) && arrays.is_array_allocated(0))
697  arrays.free_array(0);
698 
699  break;
700  }
701  case 1:
702  assert(k == 2);
703  merge2_iterator(arrays.get_array(0), arrays.get_array(1),
704  begin, end, cmp);
705  rebuild_loser_tree();
706 
707  if (arrays.is_array_empty(0) && arrays.is_array_allocated(0))
708  arrays.free_array(0);
709 
710  if (arrays.is_array_empty(1) && arrays.is_array_allocated(1))
711  arrays.free_array(1);
712 
713  break;
714  case 2:
715  assert(k == 4);
716  if (arrays.is_array_empty(3))
717  merge3_iterator(arrays.get_array(0), arrays.get_array(1),
718  arrays.get_array(2),
719  begin, end, cmp);
720  else
721  merge4_iterator(arrays.get_array(0), arrays.get_array(1),
722  arrays.get_array(2), arrays.get_array(3),
723  begin, end, cmp);
724 
725  rebuild_loser_tree();
726 
727  if (arrays.is_array_empty(0) && arrays.is_array_allocated(0))
728  arrays.free_array(0);
729 
730  if (arrays.is_array_empty(1) && arrays.is_array_allocated(1))
731  arrays.free_array(1);
732 
733  if (arrays.is_array_empty(2) && arrays.is_array_allocated(2))
734  arrays.free_array(2);
735 
736  if (arrays.is_array_empty(3) && arrays.is_array_allocated(3))
737  arrays.free_array(3);
738 
739  break;
740  case 3: multi_merge_f<3>(begin, end);
741  break;
742  case 4: multi_merge_f<4>(begin, end);
743  break;
744  case 5: multi_merge_f<5>(begin, end);
745  break;
746  case 6: multi_merge_f<6>(begin, end);
747  break;
748  case 7: multi_merge_f<7>(begin, end);
749  break;
750  case 8: multi_merge_f<8>(begin, end);
751  break;
752  case 9: multi_merge_f<9>(begin, end);
753  break;
754  case 10: multi_merge_f<10>(begin, end);
755  break;
756  default: multi_merge_k(begin, end);
757  break;
758  }
759 
760  maybe_compact();
761 
762  //std::copy(target,target + length,std::ostream_iterator<ValueType>(std::cout, "\n"));
763  }
764 
765  void swap(loser_tree& obj)
766  {
767  std::swap(free_slots, obj.free_slots);
768  swap_1D_arrays(entry, obj.entry, max_arity);
769  }
770 };
771 
772 #if STXXL_PARALLEL && (STXXL_PARALLEL_PQ_MULTIWAY_MERGE_EXTERNAL || STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL)
773 /*!
774  * Adapter from loser_tree to parallel merger
775  * __gnu_parallel::multiway_merge. This class holds most attributes of
776  * loser_tree, except for the tree itself: it thus basically only manages an
777  * array of sequence.
778  * \tparam Arity maximum arity of merger, does not need to be a power of 2
779  */
780 template <class ArraysType, class CompareType, unsigned Arity>
781 class parallel_merger_adapter : private noncopyable
782 {
783 public:
784  //! type of arrays container linked with loser tree
785  typedef ArraysType arrays_type;
786  //! comparator object type
787  typedef CompareType compare_type;
788 
789  // arity_bound / 2 < arity <= arity_bound
790  enum { arity = Arity, max_arity = 1UL << (LOG2<Arity>::ceil) };
791 
792  //! type of values stored in the arrays container
793  typedef typename arrays_type::value_type value_type;
794  //! type of the ordered sequences in the arrays container
795  typedef typename arrays_type::sequence_type sequence_type;
796 
797 public:
798  //! the comparator object
799  compare_type cmp;
800 
801  //! current tree size, invariant (k == 1 << logK), always a power of two
802  unsigned_type k;
803  //! log of current tree size
804  unsigned_type logK;
805 
806 protected:
807  //! reference to the linked arrays
808  arrays_type& arrays;
809 
810  //! stack of free player indices
811  internal_bounded_stack<unsigned_type, arity> free_slots;
812 
813 public:
814  parallel_merger_adapter(const compare_type& c, arrays_type& a)
815  : cmp(c), k(1), logK(0), arrays(a)
816  {
817  // verify strict weak ordering
818  assert(!cmp(cmp.min_value(), cmp.min_value()));
819  }
820 
821  void initialize()
822  {
823  // initial state: one empty player slot
824  free_slots.push(0);
825  }
826 
827  //! True if a is the sentinel value
828  bool is_sentinel(const value_type& a) const
829  {
830  return !(cmp(cmp.min_value(), a)); // a <= cmp.min_value()
831  }
832 
833  //! Allocate a free slot for a new player.
834  unsigned_type new_player()
835  {
836  // get a free slot
837  if (free_slots.empty()) {
838  // tree is too small, attempt to enlarge
839  double_k();
840  }
841 
842  assert(!free_slots.empty());
843  unsigned_type index = free_slots.top();
844  free_slots.pop();
845 
846  return index;
847  }
848 
849  //! Free a finished player's slot
850  void free_player(unsigned_type slot)
851  {
852  free_slots.push(slot);
853  }
854 
855  //! Whether there is still space for new array
856  bool is_space_available() const
857  {
858  return (k < arity) || !free_slots.empty();
859  }
860 
861  //! Initial call to recursive update_on_insert
862  void update_on_insert(unsigned_type /* node */,
863  const value_type& /* newKey */,
864  unsigned_type /* newIndex */)
865  { }
866 
867  //! make the tree twice as wide
868  void double_k()
869  {
870  STXXL_VERBOSE1("double_k (before) k=" << k << " logK=" << logK << " arity=" << arity << " max_arity=" << max_arity << " #free=" << free_slots.size());
871  assert(k > 0);
872  assert(k < arity);
873  assert(free_slots.empty()); // stack was free (probably not needed)
874 
875  // make all new entries free and push them on the free stack
876  for (unsigned_type i = 2 * k - 1; i >= k; i--) //backwards
877  {
878  arrays.make_array_sentinel(i);
879  if (i < arity)
880  free_slots.push(i);
881  }
882 
883  // double the size
884  k *= 2;
885  logK++;
886 
887  STXXL_VERBOSE1("double_k (after) k=" << k << " logK=" << logK << " arity=" << arity << " max_arity=" << max_arity << " #free=" << free_slots.size());
888  assert(!free_slots.empty());
889  assert(k <= max_arity);
890  }
891 
892  //! compact nonempty segments in the left half of the tree
893  void compact_tree()
894  {
895  STXXL_VERBOSE3("compact_tree (before) k=" << k << " logK=" << logK << " #free=" << free_slots.size());
896  assert(logK > 0);
897 
898  // compact all nonempty segments to the left
899  unsigned_type last_empty = 0;
900  for (unsigned_type pos = 0; pos < k; pos++)
901  {
902  if (!arrays.is_array_empty(pos))
903  {
904  assert(arrays.is_array_allocated(pos));
905  if (pos != last_empty)
906  {
907  assert(!arrays.is_array_allocated(last_empty));
908  arrays.swap_arrays(last_empty, pos);
909  }
910  ++last_empty;
911  }
912  /*
913  else
914  {
915  if(segment[pos])
916  {
917  STXXL_VERBOSE2("int_arrays::compact_tree() deleting segment "<<pos<<
918  " address: "<<segment[pos]<<" size: "<<segment_size[pos]);
919  delete [] segment[pos];
920  segment[pos] = 0;
921  mem_cons_ -= segment_size[pos];
922  }
923  }*/
924  }
925 
926  // half degree as often as possible
927  while ((k > 1) && last_empty <= (k / 2))
928  {
929  k /= 2;
930  logK--;
931  }
932 
933  // overwrite garbage and compact the stack of free segment indices
934  free_slots.clear(); // none free
935  for ( ; last_empty < k; last_empty++)
936  {
937  assert(!arrays.is_array_allocated(last_empty));
938  arrays.make_array_sentinel(last_empty);
939  if (last_empty < arity)
940  free_slots.push(last_empty);
941  }
942 
943  STXXL_VERBOSE3("compact_tree (after) k=" << k << " logK=" << logK << " #free=" << free_slots.size());
944  }
945 
946  //! compact tree if it got considerably smaller
947  void maybe_compact()
948  {
949  const unsigned_type num_segments_used = k - free_slots.size();
950  const unsigned_type num_segments_trigger = k - (3 * k / 5);
951  // using k/2 would be worst case inefficient (for large k)
952  // for k \in {2, 4, 8} the trigger is k/2 which is good
953  // because we have special mergers for k \in {1, 2, 4}
954  // there is also a special 3-way-merger, that will be
955  // triggered if k == 4 && is_array_atsentinel(3)
956  STXXL_VERBOSE3("int_merger compact? k=" << k << " #used=" << num_segments_used
957  << " <= #trigger=" << num_segments_trigger << " ==> "
958  << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
959  << " || "
960  << ((k == 4 && !free_slots.empty() && !arrays.is_array_empty(3)) ? "yes" : "no ")
961  << " #free=" << free_slots.size());
962  if (k > 1 &&
963  ((num_segments_used <= num_segments_trigger) ||
964  (k == 4 && !free_slots.empty() && !arrays.is_array_empty(3))))
965  {
966  compact_tree();
967  }
968  }
969 
970  void swap(parallel_merger_adapter& obj)
971  {
972  std::swap(free_slots, obj.free_slots);
973  }
974 };
975 #endif // STXXL_PARALLEL && (STXXL_PARALLEL_PQ_MULTIWAY_MERGE_EXTERNAL || STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL)
976 
977 } // namespace priority_queue_local
978 
979 //! \}
980 
982 
983 #endif // !STXXL_CONTAINERS_PQ_MERGERS_HEADER
984 // vim: et:ts=4:sw=4
void multi_merge_f(OutputIterator begin, OutputIterator end)
Definition: pq_mergers.h:613
void double_k()
make the tree twice as wide
Definition: pq_mergers.h:461
arrays_type & arrays
reference to the linked arrays
Definition: pq_mergers.h:282
#define StartMerge4(a, b, c, d)
bool is_space_available() const
Whether there is still space for new array.
Definition: pq_mergers.h:345
void maybe_compact()
compact tree if it got considerably smaller
Definition: pq_mergers.h:546
arrays_type::sequence_type sequence_type
type of the ordered sequences in the arrays container
Definition: pq_mergers.h:265
void update_on_insert(unsigned_type node, const value_type &newKey, unsigned_type newIndex)
Initial call to recursive update_on_insert.
Definition: pq_mergers.h:451
void multi_merge_k(OutputIterator begin, OutputIterator end)
multi-merge for arbitrary K
Definition: pq_mergers.h:571
void merge3_iterator(InputIterator &source0, InputIterator &source1, InputIterator &source2, OutputIterator target, OutputIterator end, CompareType &cmp)
Definition: pq_mergers.h:71
#define TreeStep(L)
void swap_1D_arrays(Type *a, Type *b, unsigned_type size)
Definition: utils.h:246
arrays_type::value_type value_type
type of values stored in the arrays container
Definition: pq_mergers.h:263
Loser tree from Knuth, &quot;Sorting and Searching&quot;, Section 5.4.1.
Definition: pq_losertree.h:38
void free_player(unsigned_type slot)
Free a finished player&#39;s slot.
Definition: pq_mergers.h:338
CompareType compare_type
comparator object type
Definition: pq_mergers.h:257
void merge4_iterator(InputIterator &source0, InputIterator &source1, InputIterator &source2, InputIterator &source3, OutputIterator target, OutputIterator end, CompareType &cmp)
Definition: pq_mergers.h:138
#define STXXL_VERBOSE3(x)
Definition: verbose.h:127
choose_int_types< my_pointer_size >::int_type int_type
Definition: types.h:63
loser_tree(const compare_type &c, arrays_type &a)
Definition: pq_mergers.h:298
void update_on_insert(unsigned_type node, const value_type &newKey, unsigned_type newIndex, value_type *winner_key, unsigned_type *winner_index, unsigned_type *mask)
Update loser tree on insert or decrement of player index first go up the tree all the way to the root...
Definition: pq_mergers.h:399
compare_type cmp
the comparator object
Definition: pq_mergers.h:269
#define STXXL_BEGIN_NAMESPACE
Definition: namespace.h:16
struct Entry entry[max_arity]
levels of loser tree: entry[0] contains the winner info
Definition: pq_mergers.h:292
#define STXXL_VERBOSE1(x)
Definition: verbose.h:113
unsigned_type index
number of losing segment
Definition: pq_mergers.h:288
value_type key
Key of Loser element (winner for 0)
Definition: pq_mergers.h:287
void compact_tree()
compact nonempty segments in the left half of the tree
Definition: pq_mergers.h:489
type of nodes in the loser tree
Definition: pq_mergers.h:285
unsigned_type new_player()
Allocate a free slot for a new player.
Definition: pq_mergers.h:322
unsigned_type init_winner(unsigned_type root)
Definition: pq_mergers.h:363
choose_int_types< my_pointer_size >::unsigned_type unsigned_type
Definition: types.h:64
#define Merge4Case(a, b, c, d)
internal_bounded_stack< unsigned_type, MaxArity > free_slots
Definition: pq_losertree.h:57
void multi_merge(OutputIterator begin, OutputIterator end)
extract the (length = end - begin) smallest elements and write them to [begin..end) empty segments ar...
Definition: pq_mergers.h:667
bool is_sentinel(const value_type &a) const
True if a is the sentinel value.
Definition: pq_mergers.h:316
void rebuild_loser_tree()
rebuild loser tree information from the values in current
Definition: pq_mergers.h:351
#define Merge3Case(a, b, c)
ArraysType arrays_type
type of arrays container linked with loser tree
Definition: pq_mergers.h:255
internal_bounded_stack< unsigned_type, arity > free_slots
stack of free player indices
Definition: pq_mergers.h:295
void merge2_iterator(InputIterator &source0, InputIterator &source1, OutputIterator target, OutputIterator end, CompareType &cmp)
Definition: pq_mergers.h:41
#define STXXL_END_NAMESPACE
Definition: namespace.h:17