STXXL  1.4-dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
multiway_merge.h
Go to the documentation of this file.
1 /***************************************************************************
2  * include/stxxl/bits/parallel/multiway_merge.h
3  *
4  * Implementation of sequential and parallel multiway merge.
5  * Extracted from MCSTL - http://algo2.iti.uni-karlsruhe.de/singler/mcstl/
6  *
7  * Part of the STXXL. See http://stxxl.sourceforge.net
8  *
9  * Copyright (C) 2007 Johannes Singler <[email protected]>
10  * Copyright (C) 2014 Timo Bingmann <[email protected]>
11  *
12  * Distributed under the Boost Software License, Version 1.0.
13  * (See accompanying file LICENSE_1_0.txt or copy at
14  * http://www.boost.org/LICENSE_1_0.txt)
15  **************************************************************************/
16 
17 #ifndef STXXL_PARALLEL_MULTIWAY_MERGE_HEADER
18 #define STXXL_PARALLEL_MULTIWAY_MERGE_HEADER
19 
20 #include <vector>
21 #include <iterator>
22 #include <algorithm>
23 
24 #include <stxxl/bits/verbose.h>
34 
35 #if defined(_MSC_VER) && STXXL_DEBUG_ASSERTIONS
36 #include <BaseTsd.h>
37 typedef SSIZE_T ssize_t;
38 #endif
39 
41 
42 namespace parallel {
43 
44 //! Length of a sequence described by a pair of iterators.
45 template <typename RandomAccessIteratorPair>
46 typename std::iterator_traits<
47  typename RandomAccessIteratorPair::first_type
48  >::difference_type
49 iterpair_size(const RandomAccessIteratorPair& p)
50 {
51  return p.second - p.first;
52 }
53 
54 /*!
55  * Iterator wrapper supporting an implicit supremum at the end of the sequence,
56  * dominating all comparisons. Deriving from RandomAccessIterator is not
57  * possible since RandomAccessIterator need not be a class.
58 */
59 template <typename RandomAccessIterator, typename Comparator>
61 {
62 public:
63  //! Our own type
65 
66  //! Value type of the iterator
67  typedef typename std::iterator_traits<RandomAccessIterator>::value_type value_type;
68 
69 protected:
70  //! Current iterator position.
71  RandomAccessIterator current;
72  //! End iterator of the sequence.
73  RandomAccessIterator end;
74  //! Comparator.
75  Comparator& comp;
76 
77 public:
78  /*!
79  * Constructor. Sets iterator to beginning of sequence.
80  * \param begin Begin iterator of sequence.
81  * \param end End iterator of sequence.
82  * \param comp Comparator provided for associated overloaded compare
83  * operators.
84  */
85  guarded_iterator(RandomAccessIterator begin, RandomAccessIterator end,
86  Comparator& comp)
87  : current(begin), end(end), comp(comp)
88  { }
89 
90  /*!
91  * Pre-increment operator.
92  * \return This.
93  */
95  {
96  ++current;
97  return *this;
98  }
99 
100  /*!
101  * Dereference operator.
102  * \return Referenced element.
103  */
104  value_type& operator * ()
105  {
106  return *current;
107  }
108 
109  /*!
110  * Convert to wrapped iterator.
111  * \return Wrapped iterator.
112  */
113  RandomAccessIterator & iterator()
114  {
115  return current;
116  }
117 
118  /*!
119  * Compare two elements referenced by guarded iterators.
120  * \param bi1 First iterator.
121  * \param bi2 Second iterator.
122  * \return \c True if less.
123  */
124  friend bool operator < (self_type& bi1, self_type& bi2)
125  {
126  if (bi1.current == bi1.end) // bi1 is sup
127  return bi2.current == bi2.end; // bi2 is not sup
128  if (bi2.current == bi2.end) // bi2 is sup
129  return true;
130  return bi1.comp(*bi1, *bi2); // normal compare
131  }
132 
133  /*!
134  * Compare two elements referenced by guarded iterators.
135  * \param bi1 First iterator.
136  * \param bi2 Second iterator.
137  * \return \c True if less equal.
138  */
139  friend bool operator <= (self_type& bi1, self_type& bi2)
140  {
141  if (bi2.current == bi2.end) //bi1 is sup
142  return bi1.current != bi1.end; //bi2 is not sup
143  if (bi1.current == bi1.end) //bi2 is sup
144  return false;
145  return !bi1.comp(*bi2, *bi1); //normal compare
146  }
147 };
148 
149 template <typename RandomAccessIterator, typename Comparator>
151 {
152 public:
153  //! Our own type
155 
156  //! Value type of the iterator
157  typedef typename std::iterator_traits<RandomAccessIterator>::value_type value_type;
158 
159 protected:
160  //! Current iterator position.
161  RandomAccessIterator current;
162  //! Comparator.
163  Comparator& comp;
164 
165 public:
166  /*!
167  * Constructor. Sets iterator to beginning of sequence.
168  * \param begin Begin iterator of sequence.
169  * param end Unused, only for compatibility.
170  * \param comp Unused, only for compatibility.
171  */
172  unguarded_iterator(RandomAccessIterator begin,
173  RandomAccessIterator /* end */,
174  Comparator& comp)
175  : current(begin), comp(comp)
176  { }
177 
178  /*!
179  * Pre-increment operator.
180  * \return This.
181  */
183  {
184  ++current;
185  return *this;
186  }
187 
188  /*!
189  * Dereference operator.
190  * \return Referenced element.
191  */
192  value_type& operator * ()
193  {
194  return *current;
195  }
196 
197  /*!
198  * Convert to wrapped iterator.
199  * \return Wrapped iterator.
200  */
201  RandomAccessIterator & iterator()
202  {
203  return current;
204  }
205 
206  /*!
207  * Compare two elements referenced by unguarded iterators.
208  * \param bi1 First iterator.
209  * \param bi2 Second iterator.
210  * \return \c True if less.
211  */
212  friend bool operator < (self_type& bi1, self_type& bi2)
213  {
214  return bi1.comp(*bi1, *bi2); // normal compare, unguarded
215  }
216 
217  /*!
218  * Compare two elements referenced by unguarded iterators.
219  * \param bi1 First iterator.
220  * \param bi2 Second iterator.
221  * \return \c True if less equal.
222  */
223  friend bool operator <= (self_type& bi1, self_type& bi2)
224  {
225  return !bi1.comp(*bi2, *bi1); // normal compare, unguarded
226  }
227 };
228 
229 /*!
230  * Prepare a set of sequences to be merged without a (end) guard
231  *
232  * \param seqs_begin
233  * \param seqs_end
234  * \param comp
235  * \param min_sequence
236  * \tparam Stable
237  * \pre (seqs_end - seqs_begin > 0)
238  */
239 template <bool Stable, typename RandomAccessIteratorIterator, typename Comparator>
240 typename std::iterator_traits<
241  typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
242  >::difference_type
243 prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
244  RandomAccessIteratorIterator seqs_end,
245  Comparator comp,
246  int& min_sequence)
247 {
248  STXXL_PARALLEL_PCALL(seqs_end - seqs_begin);
249 
250  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
251  ::value_type::first_type RandomAccessIterator;
252  typedef typename std::iterator_traits<RandomAccessIterator>
253  ::value_type value_type;
254  typedef typename std::iterator_traits<RandomAccessIterator>
255  ::difference_type diff_type;
256 
257  if ((*seqs_begin).first == (*seqs_begin).second)
258  {
259  // empty sequence found, it's the first one
260  min_sequence = 0;
261  return -1;
262  }
263 
264  // last element in sequence
265  value_type min = *((*seqs_begin).second - 1);
266  min_sequence = 0;
267  for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
268  {
269  if ((*s).first == (*s).second)
270  {
271  // empty sequence found
272  min_sequence = static_cast<int>(s - seqs_begin);
273  return -1;
274  }
275  const value_type& v = *((*s).second - 1);
276  if (comp(v, min))
277  {
278  // last element in sequence is strictly smaller
279  min = v;
280  min_sequence = static_cast<int>(s - seqs_begin);
281  }
282  }
283 
284  diff_type overhang_size = 0;
285 
286  int s = 0;
287  for (s = 0; s <= min_sequence; ++s)
288  {
289  RandomAccessIterator split;
290  if (Stable)
291  split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
292  min, comp);
293  else
294  split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
295  min, comp);
296 
297  overhang_size += seqs_begin[s].second - split;
298  }
299 
300  for ( ; s < (seqs_end - seqs_begin); ++s)
301  {
302  RandomAccessIterator split =
303  std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
304  min, comp);
305  overhang_size += seqs_begin[s].second - split;
306  }
307 
308  return overhang_size; // so many elements will be left over afterwards
309 }
310 
311 /*!
312  * Prepare a set of sequences to be merged with a (end) guard (sentinel)
313  * \param seqs_begin
314  * \param seqs_end
315  * \param comp
316  */
317 template <typename RandomAccessIteratorIterator, typename Comparator>
318 typename std::iterator_traits<
319  typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
320  >::difference_type
321 prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
322  RandomAccessIteratorIterator seqs_end,
323  Comparator comp)
324 {
325  STXXL_PARALLEL_PCALL(seqs_end - seqs_begin);
326 
327  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
328  ::value_type::first_type RandomAccessIterator;
329  typedef typename std::iterator_traits<RandomAccessIterator>
330  ::value_type value_type;
331  typedef typename std::iterator_traits<RandomAccessIterator>
332  ::difference_type diff_type;
333 
334  value_type* max_value = NULL; // last element in sequence
335  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
336  {
337  if ((*s).first == (*s).second)
338  continue;
339  value_type& v = *((*s).second - 1); //last element in sequence
340  if (!max_value || comp(*max_value, v)) //strictly greater
341  max_value = &v;
342  }
343 
344  diff_type overhang_size = 0;
345 
346  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
347  {
348  RandomAccessIterator split = std::lower_bound((*s).first, (*s).second, *max_value, comp);
349  overhang_size += (*s).second - split;
350  *((*s).second) = *max_value; //set sentinel
351  }
352 
353  return overhang_size; // so many elements will be left over afterwards
354 }
355 
356 /*!
357  * Highly efficient 3-way merging procedure.
358  *
359  * Merging is done with the algorithm implementation described by Peter
360  * Sanders. Basically, the idea is to minimize the number of necessary
361  * comparison after merging an element. The implementation trick that makes
362  * this fast is that the order of the sequences is stored in the instruction
363  * pointer (translated into labels in C++).
364  *
365  * This works well for merging up to 4 sequences.
366  *
367  * Note that making the merging stable does \a not come at a performance hit.
368  *
369  * Whether the merging is done guarded or unguarded is selected by the used
370  * iterator class.
371  *
372  * \param seqs_begin Begin iterator of iterator pair input sequence.
373  * \param seqs_end End iterator of iterator pair input sequence.
374  * \param target Begin iterator out output sequence.
375  * \param length Maximum length to merge.
376  * \param comp Comparator.
377  * \return End iterator of output sequence.
378  */
379 template <template <typename RAI, typename C> class Iterator,
380  typename RandomAccessIteratorIterator,
381  typename RandomAccessIterator3,
382  typename DiffType, typename Comparator>
383 RandomAccessIterator3
384 multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin,
385  RandomAccessIteratorIterator seqs_end,
386  RandomAccessIterator3 target, DiffType length,
387  Comparator comp)
388 {
389  STXXL_PARALLEL_PCALL(length);
390  STXXL_ASSERT(seqs_end - seqs_begin == 3);
391 
392  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
393  ::value_type::first_type RandomAccessIterator;
394 
395  if (length == 0)
396  return target;
397 
398 #if STXXL_DEBUG_ASSERTIONS
399  ssize_t orig_length = length;
400 #endif
401 
402  Iterator<RandomAccessIterator, Comparator>
403  seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
404  seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
405  seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
406 
407  if (seq0 <= seq1)
408  {
409  if (seq1 <= seq2)
410  goto s012;
411  else if (seq2 < seq0)
412  goto s201;
413  else
414  goto s021;
415  }
416  else
417  {
418  if (seq1 <= seq2)
419  {
420  if (seq0 <= seq2)
421  goto s102;
422  else
423  goto s120;
424  }
425  else
426  goto s210;
427  }
428 
429 #define STXXL_MERGE3CASE(a, b, c, c0, c1) \
430  s ## a ## b ## c: \
431  *target = *seq ## a; \
432  ++target; \
433  --length; \
434  ++seq ## a; \
435  if (length == 0) goto finish; \
436  if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
437  if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
438  goto s ## b ## c ## a;
439 
440  STXXL_MERGE3CASE(0, 1, 2, <=, <=);
441  STXXL_MERGE3CASE(1, 2, 0, <=, <);
442  STXXL_MERGE3CASE(2, 0, 1, <, <);
443  STXXL_MERGE3CASE(1, 0, 2, <, <=);
444  STXXL_MERGE3CASE(0, 2, 1, <=, <=);
445  STXXL_MERGE3CASE(2, 1, 0, <, <);
446 
447 #undef STXXL_MERGE3CASE
448 
449 finish:
450  ;
451 
452 #if STXXL_DEBUG_ASSERTIONS
453  STXXL_CHECK_EQUAL((seq0.iterator() - seqs_begin[0].first) +
454  (seq1.iterator() - seqs_begin[1].first) +
455  (seq2.iterator() - seqs_begin[2].first),
456  orig_length);
457 #endif
458 
459  seqs_begin[0].first = seq0.iterator();
460  seqs_begin[1].first = seq1.iterator();
461  seqs_begin[2].first = seq2.iterator();
462 
463  return target;
464 }
465 
466 template <typename RandomAccessIteratorIterator,
467  typename RandomAccessIterator3,
468  typename DiffType, typename Comparator>
469 RandomAccessIterator3
470 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
471  RandomAccessIteratorIterator seqs_end,
472  RandomAccessIterator3 target, DiffType length,
473  Comparator comp)
474 {
475  STXXL_PARALLEL_PCALL(length);
476  STXXL_ASSERT(seqs_end - seqs_begin == 3);
477 
478  int min_seq;
479  RandomAccessIterator3 target_end;
480  DiffType overhang = prepare_unguarded<true>(seqs_begin, seqs_end, comp, min_seq);
481 
482  DiffType total_length = 0;
483  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
484  total_length += iterpair_size(*s);
485 
486  if (overhang != (DiffType)(-1))
487  {
488  DiffType unguarded_length = std::min(length, total_length - overhang);
489  target_end = multiway_merge_3_variant<unguarded_iterator>
490  (seqs_begin, seqs_end, target, unguarded_length, comp);
491  overhang = length - unguarded_length;
492  }
493  else
494  {
495  // empty sequence found
496  overhang = length;
497  target_end = target;
498  }
499 
500  STXXL_DEBUG_ASSERT(target_end == target + length - overhang);
501  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target_end, comp));
502 
503  switch (min_seq)
504  {
505  case 0:
506  // iterators will be advanced accordingly
507  target_end = merge_advance(
508  seqs_begin[1].first, seqs_begin[1].second,
509  seqs_begin[2].first, seqs_begin[2].second,
510  target_end, overhang, comp);
511  break;
512  case 1:
513  target_end = merge_advance(
514  seqs_begin[0].first, seqs_begin[0].second,
515  seqs_begin[2].first, seqs_begin[2].second,
516  target_end, overhang, comp);
517  break;
518  case 2:
519  target_end = merge_advance(
520  seqs_begin[0].first, seqs_begin[0].second,
521  seqs_begin[1].first, seqs_begin[1].second,
522  target_end, overhang, comp);
523  break;
524  default:
525  assert(false);
526  }
527 
528  STXXL_DEBUG_ASSERT(target_end == target + length);
529  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target_end, comp));
530 
531  return target_end;
532 }
533 
534 /*!
535  * Highly efficient 4-way merging procedure.
536  *
537  * Merging is done with the algorithm implementation described by Peter
538  * Sanders. Basically, the idea is to minimize the number of necessary
539  * comparison after merging an element. The implementation trick that makes
540  * this fast is that the order of the sequences is stored in the instruction
541  * pointer (translated into goto labels in C++).
542  *
543  * This works well for merging up to 4 sequences.
544  *
545  * Note that making the merging stable does \a not come at a performance hit.
546  *
547  * Whether the merging is done guarded or unguarded is selected by the used
548  * iterator class.
549  *
550  * \param seqs_begin Begin iterator of iterator pair input sequence.
551  * \param seqs_end End iterator of iterator pair input sequence.
552  * \param target Begin iterator out output sequence.
553  * \param length Maximum length to merge.
554  * \param comp Comparator.
555  * \return End iterator of output sequence.
556  */
557 template <template <typename RAI, typename C> class iterator,
558  typename RandomAccessIteratorIterator,
559  typename RandomAccessIterator3,
560  typename DiffType, typename Comparator>
561 RandomAccessIterator3
562 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
563  RandomAccessIteratorIterator seqs_end,
564  RandomAccessIterator3 target, DiffType length,
565  Comparator comp)
566 {
567  STXXL_PARALLEL_PCALL(length);
568  STXXL_ASSERT(seqs_end - seqs_begin == 4);
569 
570  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
571  ::value_type::first_type RandomAccessIterator;
572 
573  if (length == 0)
574  return target;
575 
576 #if STXXL_DEBUG_ASSERTIONS
577  ssize_t orig_length = length;
578 #endif
579 
580  iterator<RandomAccessIterator, Comparator>
581  seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
582  seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
583  seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
584  seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
585 
586 #define STXXL_DECISION(a, b, c, d) do { \
587  if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
588  if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
589  if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
590  goto s ## a ## b ## c ## d; \
591 } \
592  while (0)
593 
594  if (seq0 <= seq1)
595  {
596  if (seq1 <= seq2)
597  STXXL_DECISION(0, 1, 2, 3);
598  else if (seq2 < seq0)
599  STXXL_DECISION(2, 0, 1, 3);
600  else
601  STXXL_DECISION(0, 2, 1, 3);
602  }
603  else
604  {
605  if (seq1 <= seq2)
606  {
607  if (seq0 <= seq2)
608  STXXL_DECISION(1, 0, 2, 3);
609  else
610  STXXL_DECISION(1, 2, 0, 3);
611  }
612  else
613  STXXL_DECISION(2, 1, 0, 3);
614  }
615 
616 #define STXXL_MERGE4CASE(a, b, c, d, c0, c1, c2) \
617  s ## a ## b ## c ## d: \
618  if (length == 0) goto finish; \
619  *target = *seq ## a; \
620  ++target; \
621  --length; \
622  ++seq ## a; \
623  if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
624  if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
625  if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
626  goto s ## b ## c ## d ## a;
627 
628  STXXL_MERGE4CASE(0, 1, 2, 3, <=, <=, <=);
629  STXXL_MERGE4CASE(0, 1, 3, 2, <=, <=, <=);
630  STXXL_MERGE4CASE(0, 2, 1, 3, <=, <=, <=);
631  STXXL_MERGE4CASE(0, 2, 3, 1, <=, <=, <=);
632  STXXL_MERGE4CASE(0, 3, 1, 2, <=, <=, <=);
633  STXXL_MERGE4CASE(0, 3, 2, 1, <=, <=, <=);
634  STXXL_MERGE4CASE(1, 0, 2, 3, <, <=, <=);
635  STXXL_MERGE4CASE(1, 0, 3, 2, <, <=, <=);
636  STXXL_MERGE4CASE(1, 2, 0, 3, <=, <, <=);
637  STXXL_MERGE4CASE(1, 2, 3, 0, <=, <=, <);
638  STXXL_MERGE4CASE(1, 3, 0, 2, <=, <, <=);
639  STXXL_MERGE4CASE(1, 3, 2, 0, <=, <=, <);
640  STXXL_MERGE4CASE(2, 0, 1, 3, <, <, <=);
641  STXXL_MERGE4CASE(2, 0, 3, 1, <, <=, <);
642  STXXL_MERGE4CASE(2, 1, 0, 3, <, <, <=);
643  STXXL_MERGE4CASE(2, 1, 3, 0, <, <=, <);
644  STXXL_MERGE4CASE(2, 3, 0, 1, <=, <, <);
645  STXXL_MERGE4CASE(2, 3, 1, 0, <=, <, <);
646  STXXL_MERGE4CASE(3, 0, 1, 2, <, <, <);
647  STXXL_MERGE4CASE(3, 0, 2, 1, <, <, <);
648  STXXL_MERGE4CASE(3, 1, 0, 2, <, <, <);
649  STXXL_MERGE4CASE(3, 1, 2, 0, <, <, <);
650  STXXL_MERGE4CASE(3, 2, 0, 1, <, <, <);
651  STXXL_MERGE4CASE(3, 2, 1, 0, <, <, <);
652 
653 #undef STXXL_MERGE4CASE
654 #undef STXXL_DECISION
655 
656 finish:
657  ;
658 
659 #if STXXL_DEBUG_ASSERTIONS
660  STXXL_CHECK_EQUAL((seq0.iterator() - seqs_begin[0].first) +
661  (seq1.iterator() - seqs_begin[1].first) +
662  (seq2.iterator() - seqs_begin[2].first) +
663  (seq3.iterator() - seqs_begin[3].first),
664  orig_length);
665 #endif
666 
667  seqs_begin[0].first = seq0.iterator();
668  seqs_begin[1].first = seq1.iterator();
669  seqs_begin[2].first = seq2.iterator();
670  seqs_begin[3].first = seq3.iterator();
671 
672  return target;
673 }
674 
675 template <typename RandomAccessIteratorIterator,
676  typename RandomAccessIterator3,
677  typename DiffType, typename Comparator>
678 RandomAccessIterator3
679 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
680  RandomAccessIteratorIterator seqs_end,
681  RandomAccessIterator3 target, DiffType length,
682  Comparator comp)
683 {
684  STXXL_PARALLEL_PCALL(length);
685  STXXL_ASSERT(seqs_end - seqs_begin == 4);
686 
687  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
688  ::value_type RandomAccessIteratorPair;
689 
690  int min_seq;
691  RandomAccessIterator3 target_end;
692  DiffType overhang = prepare_unguarded<true>(seqs_begin, seqs_end, comp, min_seq);
693 
694  DiffType total_length = 0;
695  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
696  total_length += iterpair_size(*s);
697 
698  if (overhang != (DiffType) - 1)
699  {
700  DiffType unguarded_length = std::min(length, total_length - overhang);
701  target_end = multiway_merge_4_variant<unguarded_iterator>
702  (seqs_begin, seqs_end, target, unguarded_length, comp);
703  overhang = length - unguarded_length;
704  }
705  else
706  {
707  // empty sequence found
708  overhang = length;
709  target_end = target;
710  }
711 
712  STXXL_DEBUG_ASSERT(target_end == target + length - overhang);
713  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target_end, comp));
714 
715  std::vector<RandomAccessIteratorPair> one_missing(seqs_begin, seqs_end);
716  one_missing.erase(one_missing.begin() + min_seq); //remove
717 
718  target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, overhang, comp);
719 
720  one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]); //insert back again
721  std::copy(one_missing.begin(), one_missing.end(), seqs_begin); //write back modified iterators
722 
723  STXXL_DEBUG_ASSERT(target_end == target + length);
724  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target_end, comp));
725 
726  return target_end;
727 }
728 
729 /*!
730  * Basic multi-way merging procedure.
731  *
732  * The head elements are kept in a sorted array, new heads are inserted
733  * linearly.
734  *
735  * \param seqs_begin Begin iterator of iterator pair input sequence.
736  * \param seqs_end End iterator of iterator pair input sequence.
737  * \param target Begin iterator out output sequence.
738  * \param length Maximum length to merge.
739  * \param comp Comparator.
740  * \tparam Stable Stable merging incurs a performance penalty.
741  * \return End iterator of output sequence.
742  */
743 template <bool Stable,
744  typename RandomAccessIteratorIterator,
745  typename RandomAccessIterator3,
746  typename DiffType, typename Comparator>
747 RandomAccessIterator3
748 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
749  RandomAccessIteratorIterator seqs_end,
750  RandomAccessIterator3 target, DiffType length,
751  Comparator comp)
752 {
753  STXXL_PARALLEL_PCALL(length);
754 
755  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
756  ::value_type::first_type RandomAccessIterator;
757  typedef typename std::iterator_traits<RandomAccessIterator>
758  ::value_type value_type;
759 
760  // num remaining pieces
761  int k = static_cast<int>(seqs_end - seqs_begin), nrp;
762 
763  value_type* pl = new value_type[k];
764  int* source = new int[k];
765  DiffType total_length = 0;
766 
767 #define POS(i) seqs_begin[(i)].first
768 #define STOPS(i) seqs_begin[(i)].second
769 
770  //write entries into queue
771  nrp = 0;
772  for (int pi = 0; pi < k; ++pi)
773  {
774  if (STOPS(pi) != POS(pi))
775  {
776  pl[nrp] = *(POS(pi));
777  source[nrp] = pi;
778  ++nrp;
779  total_length += iterpair_size(seqs_begin[pi]);
780  }
781  }
782 
783  if (Stable)
784  {
785  for (int k = 0; k < nrp - 1; ++k)
786  for (int pi = nrp - 1; pi > k; --pi)
787  if (comp(pl[pi], pl[pi - 1]) ||
788  (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
789  {
790  std::swap(pl[pi - 1], pl[pi]);
791  std::swap(source[pi - 1], source[pi]);
792  }
793  }
794  else
795  {
796  for (int k = 0; k < nrp - 1; ++k)
797  for (int pi = nrp - 1; pi > k; --pi)
798  if (comp(pl[pi], pl[pi - 1]))
799  {
800  std::swap(pl[pi - 1], pl[pi]);
801  std::swap(source[pi - 1], source[pi]);
802  }
803  }
804 
805  // iterate
806  if (Stable)
807  {
808  int j;
809  while (nrp > 0 && length > 0)
810  {
811  if (source[0] < source[1])
812  {
813  // pl[0] <= pl[1] ?
814  while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
815  {
816  *target = pl[0];
817  ++target;
818  ++POS(source[0]);
819  --length;
820  if (POS(source[0]) == STOPS(source[0]))
821  {
822  // move everything to the left
823  for (int s = 0; s < nrp - 1; ++s)
824  {
825  pl[s] = pl[s + 1];
826  source[s] = source[s + 1];
827  }
828  --nrp;
829  break;
830  }
831  else
832  pl[0] = *(POS(source[0]));
833  }
834  }
835  else
836  {
837  // pl[0] < pl[1] ?
838  while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
839  {
840  *target = pl[0];
841  ++target;
842  ++POS(source[0]);
843  --length;
844  if (POS(source[0]) == STOPS(source[0]))
845  {
846  for (int s = 0; s < nrp - 1; ++s)
847  {
848  pl[s] = pl[s + 1];
849  source[s] = source[s + 1];
850  }
851  --nrp;
852  break;
853  }
854  else
855  pl[0] = *(POS(source[0]));
856  }
857  }
858 
859  //sink down
860  j = 1;
861  while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
862  (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
863  {
864  std::swap(pl[j - 1], pl[j]);
865  std::swap(source[j - 1], source[j]);
866  ++j;
867  }
868  }
869  }
870  else
871  {
872  int j;
873  while (nrp > 0 && length > 0)
874  {
875  // pl[0] <= pl[1] ?
876  while ((nrp == 1 || !comp(pl[1], pl[0])) && length > 0)
877  {
878  *target = pl[0];
879  ++target;
880  ++POS(source[0]);
881  --length;
882  if (POS(source[0]) == STOPS(source[0]))
883  {
884  for (int s = 0; s < (nrp - 1); ++s)
885  {
886  pl[s] = pl[s + 1];
887  source[s] = source[s + 1];
888  }
889  --nrp;
890  break;
891  }
892  else
893  pl[0] = *(POS(source[0]));
894  }
895 
896  //sink down
897  j = 1;
898  while ((j < nrp) && comp(pl[j], pl[j - 1]))
899  {
900  std::swap(pl[j - 1], pl[j]);
901  std::swap(source[j - 1], source[j]);
902  ++j;
903  }
904  }
905  }
906 
907  delete[] pl;
908  delete[] source;
909 
910  return target;
911 }
912 
913 /*!
914  * Multi-way merging procedure for a high branching factor, guarded case.
915  *
916  * The head elements are kept in a loser tree.
917  * \param seqs_begin Begin iterator of iterator pair input sequence.
918  * \param seqs_end End iterator of iterator pair input sequence.
919  * \param target Begin iterator out output sequence.
920  * \param length Maximum length to merge.
921  * \param comp Comparator.
922  * \tparam Stable Stable merging incurs a performance penalty.
923  * \return End iterator of output sequence.
924  */
925 template <typename LoserTreeType,
926  typename RandomAccessIteratorIterator,
927  typename RandomAccessIterator3,
928  typename DiffType, typename Comparator>
929 RandomAccessIterator3
930 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
931  RandomAccessIteratorIterator seqs_end,
932  RandomAccessIterator3 target, DiffType length,
933  Comparator comp)
934 {
935  STXXL_PARALLEL_PCALL(length);
936 
937  typedef typename LoserTreeType::source_type source_type;
938  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
939  ::value_type::first_type RandomAccessIterator;
940  typedef typename std::iterator_traits<RandomAccessIterator>
941  ::value_type value_type;
942 
943  source_type k = static_cast<source_type>(seqs_end - seqs_begin);
944 
945  LoserTreeType lt(k, comp);
946 
947  DiffType total_length = 0;
948 
949  const value_type* arbitrary_element = NULL;
950 
951  // find an arbitrary element to avoid default construction
952  for (source_type t = 0; t < k; ++t)
953  {
954  if (!arbitrary_element && iterpair_size(seqs_begin[t]) > 0)
955  arbitrary_element = &(*seqs_begin[t].first);
956 
957  total_length += iterpair_size(seqs_begin[t]);
958  }
959 
960  for (source_type t = 0; t < k; ++t)
961  {
962  if (UNLIKELY(seqs_begin[t].first == seqs_begin[t].second))
963  lt.insert_start(*arbitrary_element, t, true);
964  else
965  lt.insert_start(*seqs_begin[t].first, t, false);
966  }
967 
968  lt.init();
969 
970  total_length = std::min(total_length, length);
971 
972  for (DiffType i = 0; i < total_length; ++i)
973  {
974  // take out
975  source_type source = lt.get_min_source();
976 
977  *target = *seqs_begin[source].first;
978  ++target;
979  ++seqs_begin[source].first;
980 
981  // feed
982  if (seqs_begin[source].first == seqs_begin[source].second)
983  lt.delete_min_insert(*arbitrary_element, true);
984  else
985  // replace from same source
986  lt.delete_min_insert(*seqs_begin[source].first, false);
987  }
988 
989  return target;
990 }
991 
992 /*!
993  * Multi-way merging procedure for a high branching factor, unguarded case.
994  * The head elements are kept in a loser tree.
995  *
996  * \param seqs_begin Begin iterator of iterator pair input sequence.
997  * \param seqs_end End iterator of iterator pair input sequence.
998  * \param target Begin iterator out output sequence.
999  * \param length Maximum length to merge.
1000  * \param comp Comparator.
1001  * \tparam Stable Stable merging incurs a performance penalty.
1002  * \return End iterator of output sequence.
1003  * \pre No input will run out of elements during the merge.
1004  */
1005 template <typename LoserTreeType,
1006  typename RandomAccessIteratorIterator,
1007  typename RandomAccessIterator3,
1008  typename DiffType,
1009  typename Comparator>
1010 RandomAccessIterator3
1012  RandomAccessIteratorIterator seqs_begin,
1013  RandomAccessIteratorIterator seqs_end,
1014  RandomAccessIterator3 target, DiffType length,
1015  Comparator comp)
1016 {
1017  STXXL_PARALLEL_PCALL(length);
1018 
1019  int k = (int)(seqs_end - seqs_begin);
1020 
1021  // sentinel is item at end of first sequence.
1022  LoserTreeType lt(k, *(seqs_begin->second - 1), comp);
1023 
1024  DiffType total_length = 0;
1025 
1026  for (int t = 0; t < k; ++t)
1027  {
1028  assert(seqs_begin[t].first != seqs_begin[t].second);
1029 
1030  lt.insert_start(*seqs_begin[t].first, t);
1031 
1032  total_length += iterpair_size(seqs_begin[t]);
1033  }
1034 
1035  lt.init();
1036 
1037  // do not go past end
1038  length = std::min(total_length, length);
1039 
1040  int source;
1041 
1042 #if STXXL_DEBUG_ASSERTIONS
1043  DiffType i = 0;
1044 #endif
1045 
1046  RandomAccessIterator3 target_end = target + length;
1047  while (target < target_end)
1048  {
1049  // take out
1050  source = lt.get_min_source();
1051 
1052 #if STXXL_DEBUG_ASSERTIONS
1053  assert(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
1054 #endif
1055 
1056  *target = *seqs_begin[source].first;
1057  ++seqs_begin[source].first;
1058  ++target;
1059 
1060 #if STXXL_DEBUG_ASSERTIONS
1061  assert((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
1062  ++i;
1063 #endif
1064  // feed
1065  // replace from same source
1066  lt.delete_min_insert(*seqs_begin[source].first);
1067  }
1068 
1069  return target;
1070 }
1071 
1072 template <bool Stable, class ValueType, class Comparator>
1074 {
1075 public:
1077 };
1078 
1079 #define STXXL_NO_POINTER(T) \
1080  template <bool Stable, class Comparator> \
1081  struct loser_tree_traits<Stable, T, Comparator> \
1082  { \
1083  typedef LoserTreeCopy<Stable, T, Comparator> LT; \
1084  };
1085 
1086 STXXL_NO_POINTER(unsigned char)
1088 STXXL_NO_POINTER(unsigned short)
1090 STXXL_NO_POINTER(unsigned int)
1092 STXXL_NO_POINTER(unsigned long)
1094 STXXL_NO_POINTER(unsigned long long)
1096 
1097 #undef STXXL_NO_POINTER
1098 
1099 template <bool Stable, class ValueType, class Comparator>
1101 {
1102 public:
1104 };
1105 
1106 #define STXXL_NO_POINTER_UNGUARDED(T) \
1107  template <bool Stable, class Comparator> \
1108  struct loser_tree_traits_unguarded<Stable, T, Comparator> \
1109  { \
1110  typedef LoserTreeCopyUnguarded<Stable, T, Comparator> LT; \
1111  };
1112 
1121 STXXL_NO_POINTER_UNGUARDED(unsigned long long)
1123 
1124 #undef STXXL_NO_POINTER_UNGUARDED
1125 
1126 template <bool Stable,
1127  typename RandomAccessIteratorIterator,
1128  typename RandomAccessIterator3,
1129  typename DiffType, typename Comparator>
1130 RandomAccessIterator3
1132  RandomAccessIteratorIterator seqs_begin,
1133  RandomAccessIteratorIterator seqs_end,
1134  RandomAccessIterator3 target, DiffType length,
1135  Comparator comp)
1136 {
1137  STXXL_PARALLEL_PCALL(length);
1138 
1139  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1140  ::value_type::first_type RandomAccessIterator;
1141  typedef typename std::iterator_traits<RandomAccessIterator>
1142  ::value_type value_type;
1143 
1144  int min_seq;
1145  RandomAccessIterator3 target_end;
1146  DiffType overhang = prepare_unguarded<Stable>(seqs_begin, seqs_end, comp, min_seq);
1147 
1148  DiffType total_length = 0;
1149  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1150  total_length += iterpair_size(*s);
1151 
1152  if (overhang != (DiffType)(-1))
1153  {
1154  DiffType unguarded_length = std::min(length, total_length - overhang);
1157  (seqs_begin, seqs_end, target, unguarded_length, comp);
1158  overhang = length - unguarded_length;
1159  }
1160  else
1161  {
1162  // empty sequence found
1163  overhang = length;
1164  target_end = target;
1165  }
1166 
1167  STXXL_DEBUG_ASSERT(target_end == target + length - overhang);
1168  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target_end, comp));
1169 
1170  target_end = multiway_merge_loser_tree
1172  (seqs_begin, seqs_end, target_end, overhang, comp);
1173 
1174  STXXL_DEBUG_ASSERT(target_end == target + length);
1175  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target_end, comp));
1176 
1177  return target_end;
1178 }
1179 
1180 template <bool Stable,
1181  typename RandomAccessIteratorIterator,
1182  typename RandomAccessIterator3,
1183  typename DiffType, typename Comparator>
1184 RandomAccessIterator3
1186  RandomAccessIteratorIterator seqs_begin,
1187  RandomAccessIteratorIterator seqs_end,
1188  RandomAccessIterator3 target, DiffType length,
1189  Comparator comp)
1190 {
1191  STXXL_PARALLEL_PCALL(length);
1192 
1193  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1194  ::value_type::first_type RandomAccessIterator;
1195  typedef typename std::iterator_traits<RandomAccessIterator>
1196  ::value_type value_type;
1197 
1198  // move end of sequences to include the sentinel for merging
1199  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1200  ++(*s).second;
1201 
1202  RandomAccessIterator3 target_end
1205  (seqs_begin, seqs_end, target, length, comp);
1206 
1207  STXXL_DEBUG_ASSERT(target_end == target + length);
1208  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target_end, comp));
1209 
1210  // restore end of sequences
1211  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1212  --(*s).second;
1213 
1214  return target_end;
1215 }
1216 
1217 /*!
1218  * Sequential multi-way merging switch.
1219  *
1220  * The decision if based on the branching factor and runtime settings.
1221  *
1222  * \param seqs_begin Begin iterator of iterator pair input sequence.
1223  * \param seqs_end End iterator of iterator pair input sequence.
1224  * \param target Begin iterator out output sequence.
1225  * \param length Maximum length to merge.
1226  * \param comp Comparator.
1227  * \tparam Stable Stable merging incurs a performance penalty.
1228  * \tparam Sentinels The sequences have a sentinel element.
1229  * \return End iterator of output sequence.
1230  */
1231 template <bool Stable, bool Sentinels,
1232  typename RandomAccessIteratorIterator,
1233  typename RandomAccessIterator3,
1234  typename DiffType, typename Comparator>
1235 RandomAccessIterator3
1236 sequential_multiway_merge(RandomAccessIteratorIterator seqs_begin,
1237  RandomAccessIteratorIterator seqs_end,
1238  RandomAccessIterator3 target, DiffType length,
1239  Comparator comp)
1240 {
1241  STXXL_PARALLEL_PCALL(length);
1242 
1243  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1244  ::value_type::first_type RandomAccessIterator;
1245  typedef typename std::iterator_traits<RandomAccessIterator>
1246  ::value_type value_type;
1247 
1248  for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1249  STXXL_DEBUG_ASSERT(stxxl::is_sorted((*s).first, (*s).second, comp));
1250 
1251  RandomAccessIterator3 return_target = target;
1252  int k = static_cast<int>(seqs_end - seqs_begin);
1253 
1254  SETTINGS::MultiwayMergeAlgorithm mwma = SETTINGS::multiway_merge_algorithm;
1255 
1256  if (!Sentinels && mwma == SETTINGS::LOSER_TREE_SENTINEL)
1257  mwma = SETTINGS::LOSER_TREE_COMBINED;
1258 
1259  switch (k)
1260  {
1261  case 0:
1262  break;
1263  case 1:
1264  return_target = std::copy(seqs_begin[0].first,
1265  seqs_begin[0].first + length,
1266  target);
1267  seqs_begin[0].first += length;
1268  break;
1269  case 2:
1270  return_target = merge_advance(
1271  seqs_begin[0].first, seqs_begin[0].second,
1272  seqs_begin[1].first, seqs_begin[1].second,
1273  target, length, comp);
1274  break;
1275  case 3:
1276  switch (mwma)
1277  {
1278  case SETTINGS::LOSER_TREE_COMBINED:
1279  return_target = multiway_merge_3_combined(
1280  seqs_begin, seqs_end, target, length, comp);
1281  break;
1282  case SETTINGS::LOSER_TREE_SENTINEL:
1283  return_target = multiway_merge_3_variant<unguarded_iterator>(
1284  seqs_begin, seqs_end, target, length, comp);
1285  break;
1286  default:
1287  return_target = multiway_merge_3_variant<guarded_iterator>(
1288  seqs_begin, seqs_end, target, length, comp);
1289  break;
1290  }
1291  break;
1292  case 4:
1293  switch (mwma)
1294  {
1295  case SETTINGS::LOSER_TREE_COMBINED:
1296  return_target = multiway_merge_4_combined(
1297  seqs_begin, seqs_end, target, length, comp);
1298  break;
1299  case SETTINGS::LOSER_TREE_SENTINEL:
1300  return_target = multiway_merge_4_variant<unguarded_iterator>(
1301  seqs_begin, seqs_end, target, length, comp);
1302  break;
1303  default:
1304  return_target = multiway_merge_4_variant<guarded_iterator>(
1305  seqs_begin, seqs_end, target, length, comp);
1306  break;
1307  }
1308  break;
1309  default:
1310  {
1311  switch (mwma)
1312  {
1313  case SETTINGS::BUBBLE:
1314  return_target = multiway_merge_bubble<Stable>(
1315  seqs_begin, seqs_end, target, length, comp);
1316  break;
1317  case SETTINGS::LOSER_TREE:
1318  return_target = multiway_merge_loser_tree<
1320  seqs_begin, seqs_end, target, length, comp);
1321  break;
1322  case SETTINGS::LOSER_TREE_COMBINED:
1323  return_target = multiway_merge_loser_tree_combined<Stable>(
1324  seqs_begin, seqs_end, target, length, comp);
1325  break;
1326  case SETTINGS::LOSER_TREE_SENTINEL:
1327  return_target = multiway_merge_loser_tree_sentinel<Stable>(
1328  seqs_begin, seqs_end, target, length, comp);
1329  break;
1330  default:
1331  assert(0 && "multiway_merge algorithm not implemented");
1332  break;
1333  }
1334  }
1335  }
1336 
1337  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target + length, comp));
1338 
1339  return return_target;
1340 }
1341 
1342 /*!
1343  * Splitting method for parallel multi-way merge routine: use sampling and
1344  * binary search for in-exact splitting.
1345  *
1346  * \param seqs_begin Begin iterator of iterator pair input sequence.
1347  * \param seqs_end End iterator of iterator pair input sequence.
1348  * \param length Maximum length to merge.
1349  * \param total_length Total length of all sequences combined.
1350  * \param comp Comparator.
1351  * \param chunks Output subsequences for num_threads.
1352  * \param num_threads Split the sequences into for num_threads.
1353  * \tparam Stable Stable merging incurs a performance penalty.
1354  * \return End iterator of output sequence.
1355  */
1356 template <bool Stable,
1357  typename RandomAccessIteratorIterator,
1358  typename DiffType,
1359  typename Comparator>
1360 void
1362  const RandomAccessIteratorIterator& seqs_begin,
1363  const RandomAccessIteratorIterator& seqs_end,
1364  DiffType length, DiffType total_length, Comparator comp,
1365  std::vector<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type>* chunks,
1366  const thread_index_t num_threads)
1367 {
1368  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1369  ::value_type::first_type RandomAccessIterator;
1370  typedef typename std::iterator_traits<RandomAccessIterator>
1371  ::value_type value_type;
1372 
1373  const DiffType num_seqs = seqs_end - seqs_begin;
1374  const DiffType num_samples = num_threads * SETTINGS::merge_oversampling;
1375 
1376  // pick samples
1377  value_type* samples = new value_type[num_seqs * num_samples];
1378 
1379  for (DiffType s = 0; s < num_seqs; ++s)
1380  {
1381  for (DiffType i = 0; i < num_samples; ++i)
1382  {
1383  DiffType sample_index = static_cast<DiffType>(
1384  double(iterpair_size(seqs_begin[s]))
1385  * (double(i + 1) / double(num_samples + 1))
1386  * (double(length) / double(total_length))
1387  );
1388  samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
1389  }
1390  }
1391 
1392  if (Stable)
1393  std::stable_sort(samples, samples + (num_samples * num_seqs), comp);
1394  else
1395  std::sort(samples, samples + (num_samples * num_seqs), comp);
1396 
1397  // for each processor
1398  for (thread_index_t slab = 0; slab < num_threads; ++slab)
1399  {
1400  // for each sequence
1401  for (DiffType seq = 0; seq < num_seqs; ++seq)
1402  {
1403  if (slab > 0) {
1404  chunks[slab][seq].first =
1405  std::upper_bound(
1406  seqs_begin[seq].first, seqs_begin[seq].second,
1407  samples[num_samples * num_seqs * slab / num_threads],
1408  comp);
1409  }
1410  else // absolute beginning
1411  chunks[slab][seq].first = seqs_begin[seq].first;
1412 
1413  if ((slab + 1) < num_threads) {
1414  chunks[slab][seq].second =
1415  std::upper_bound(
1416  seqs_begin[seq].first, seqs_begin[seq].second,
1417  samples[num_samples * num_seqs * (slab + 1) / num_threads],
1418  comp);
1419  }
1420  else // absolute ending
1421  chunks[slab][seq].second = seqs_begin[seq].second;
1422  }
1423  }
1424 
1425  delete[] samples;
1426 }
1427 
1428 /*!
1429  * Splitting method for parallel multi-way merge routine: use multisequence
1430  * selection for exact splitting.
1431  *
1432  * \param seqs_begin Begin iterator of iterator pair input sequence.
1433  * \param seqs_end End iterator of iterator pair input sequence.
1434  * \param length Maximum length to merge.
1435  * \param total_length Total length of all sequences combined.
1436  * \param comp Comparator.
1437  * \param chunks Output subsequences for num_threads.
1438  * \param num_threads Split the sequences into for num_threads.
1439  * \tparam Stable Stable merging incurs a performance penalty.
1440  * \return End iterator of output sequence.
1441  */
1442 template <bool Stable,
1443  typename RandomAccessIteratorIterator,
1444  typename DiffType,
1445  typename Comparator>
1446 void
1448  const RandomAccessIteratorIterator& seqs_begin,
1449  const RandomAccessIteratorIterator& seqs_end,
1450  DiffType length, DiffType total_length, Comparator comp,
1451  std::vector<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type>* chunks,
1452  const thread_index_t num_threads)
1453 {
1454  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1455  ::value_type RandomAccessIteratorPair;
1456  typedef typename RandomAccessIteratorPair
1457  ::first_type RandomAccessIterator;
1458 
1459  const size_t num_seqs = seqs_end - seqs_begin;
1460  const bool tight = (total_length == length);
1461 
1462  std::vector<RandomAccessIterator>* offsets
1463  = new std::vector<RandomAccessIterator>[num_threads];
1464 
1465  std::vector<DiffType> ranks(num_threads + 1);
1466  equally_split(length, num_threads, ranks.begin());
1467 
1468  for (thread_index_t s = 0; s < (num_threads - 1); ++s)
1469  {
1470  offsets[s].resize(num_seqs);
1471  multiseq_partition(seqs_begin, seqs_end,
1472  ranks[s + 1], offsets[s].begin(), comp);
1473 
1474  if (!tight) // last one also needed and available
1475  {
1476  offsets[num_threads - 1].resize(num_seqs);
1477  multiseq_partition(seqs_begin, seqs_end,
1478  length, offsets[num_threads - 1].begin(), comp);
1479  }
1480  }
1481 
1482  // for each processor
1483  for (thread_index_t slab = 0; slab < num_threads; ++slab)
1484  {
1485  // for each sequence
1486  for (size_t s = 0; s < num_seqs; ++s)
1487  {
1488  if (slab == 0) // absolute beginning
1489  chunks[slab][s].first = seqs_begin[s].first;
1490  else
1491  chunks[slab][s].first = offsets[slab - 1][s];
1492 
1493  if (!tight || slab < (num_threads - 1))
1494  chunks[slab][s].second = offsets[slab][s];
1495  else // slab == num_threads - 1
1496  chunks[slab][s].second = seqs_begin[s].second;
1497  }
1498  }
1499 
1500  delete[] offsets;
1501 }
1502 
1503 #if STXXL_PARALLEL
1504 
1505 /*!
1506  * Parallel multi-way merge routine.
1507  *
1508  * The decision if based on the branching factor and runtime settings.
1509  *
1510  * \param seqs_begin Begin iterator of iterator pair input sequence.
1511  * \param seqs_end End iterator of iterator pair input sequence.
1512  * \param target Begin iterator out output sequence.
1513  * \param length Maximum length to merge.
1514  * \param comp Comparator.
1515  * \tparam Stable Stable merging incurs a performance penalty.
1516  * \return End iterator of output sequence.
1517  */
1518 template <bool Stable,
1519  typename RandomAccessIteratorIterator,
1520  typename RandomAccessIterator3,
1521  typename DiffType,
1522  typename Comparator>
1523 RandomAccessIterator3
1524 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
1525  RandomAccessIteratorIterator seqs_end,
1526  RandomAccessIterator3 target, const DiffType length,
1527  Comparator comp)
1528 {
1529  STXXL_PARALLEL_PCALL(length);
1530 
1531  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1532  ::value_type RandomAccessIteratorPair;
1533 
1534  for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; ++rii)
1535  STXXL_DEBUG_ASSERT(stxxl::is_sorted((*rii).first, (*rii).second, comp));
1536 
1537  // leave only non-empty sequences
1538  std::vector<RandomAccessIteratorPair> seqs_ne;
1539  seqs_ne.reserve(seqs_end - seqs_begin);
1540  DiffType total_length = 0;
1541 
1542  for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; ++raii)
1543  {
1544  DiffType length = iterpair_size(*raii);
1545  if (length > 0) {
1546  total_length += length;
1547  seqs_ne.push_back(*raii);
1548  }
1549  }
1550 
1551  size_t num_seqs = seqs_ne.size();
1552 
1553  STXXL_PARALLEL_PCALL(total_length);
1554 
1555  if (total_length == 0 || num_seqs == 0)
1556  return target;
1557 
1558  thread_index_t num_threads = static_cast<thread_index_t>(
1559  std::min(static_cast<DiffType>(SETTINGS::num_threads), total_length));
1560 
1561  Timing<inactive_tag>* t = new Timing<inactive_tag>[num_threads];
1562 
1563  for (int pr = 0; pr < num_threads; ++pr)
1564  t[pr].tic();
1565 
1566  // thread t will have to merge chunks[iam][0..k - 1]
1567 
1568  std::vector<RandomAccessIteratorPair>* chunks
1569  = new std::vector<RandomAccessIteratorPair>[num_threads];
1570 
1571  for (int s = 0; s < num_threads; ++s)
1572  chunks[s].resize(num_seqs);
1573 
1574 #pragma omp parallel num_threads(num_threads)
1575  {
1576 #pragma omp single
1577  {
1578  if (SETTINGS::multiway_merge_splitting == SETTINGS::SAMPLING)
1579  {
1580  parallel_multiway_merge_sampling_splitting<Stable>(
1581  seqs_ne.begin(), seqs_ne.end(),
1582  length, total_length, comp,
1583  chunks, num_threads);
1584  }
1585  else // (SETTINGS::multiway_merge_splitting == SETTINGS::EXACT)
1586  {
1587  parallel_multiway_merge_exact_splitting<Stable>(
1588  seqs_ne.begin(), seqs_ne.end(),
1589  length, total_length, comp,
1590  chunks, num_threads);
1591  }
1592  }
1593 
1594  thread_index_t iam = omp_get_thread_num();
1595  t[iam].tic();
1596 
1597  DiffType target_position = 0, local_length = 0;
1598 
1599  for (size_t s = 0; s < num_seqs; ++s)
1600  {
1601  target_position += chunks[iam][s].first - seqs_ne[s].first;
1602  local_length += iterpair_size(chunks[iam][s]);
1603  }
1604 
1605  sequential_multiway_merge<Stable, false>(
1606  chunks[iam].begin(), chunks[iam].end(),
1607  target + target_position,
1608  std::min(local_length, length - target_position),
1609  comp);
1610 
1611  t[iam].tic();
1612  }
1613 
1614  for (int pr = 0; pr < num_threads; ++pr)
1615  t[pr].tic();
1616 
1617  STXXL_DEBUG_ASSERT(stxxl::is_sorted(target, target + length, comp));
1618 
1619  //update ends of sequences
1620  size_t count_seqs = 0;
1621  for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; ++raii)
1622  {
1623  DiffType length = iterpair_size(*raii);
1624  if (length > 0)
1625  raii->first = chunks[num_threads - 1][count_seqs++].second;
1626  }
1627  STXXL_DEBUG_ASSERT(count_seqs == num_seqs);
1628 
1629  delete[] chunks;
1630 
1631  for (int pr = 0; pr < num_threads; ++pr)
1632  t[pr].tic();
1633  for (int pr = 0; pr < num_threads; ++pr)
1634  t[pr].print();
1635  delete[] t;
1636 
1637  return target + length;
1638 }
1639 
1640 /*!
1641  * Multi-way merging front-end with unstable mode and without sentinels.
1642  *
1643  * \param seqs_begin Begin iterator of iterator pair input sequence.
1644  * \param seqs_end End iterator of iterator pair input sequence.
1645  * \param target Begin iterator out output sequence.
1646  * \param comp Comparator.
1647  * \param length Maximum length to merge.
1648  * \return End iterator of output sequence.
1649  */
1650 template <typename RandomAccessIteratorPairIterator,
1651  typename RandomAccessIterator3,
1652  typename DiffType, typename Comparator>
1653 RandomAccessIterator3
1654 multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
1655  RandomAccessIteratorPairIterator seqs_end,
1656  RandomAccessIterator3 target, DiffType length,
1657  Comparator comp)
1658 {
1659  STXXL_PARALLEL_PCALL(seqs_end - seqs_begin);
1660 
1661  if (seqs_begin == seqs_end)
1662  return target;
1663 
1664  RandomAccessIterator3 target_end;
1666  ((seqs_end - seqs_begin) >= SETTINGS::multiway_merge_minimal_k) &&
1667  ((sequence_index_t)length >= SETTINGS::multiway_merge_minimal_n)
1668  ))
1669  target_end = parallel_multiway_merge<false>(
1670  seqs_begin, seqs_end, target, length, comp);
1671  else
1672  target_end = sequential_multiway_merge<false, false>(
1673  seqs_begin, seqs_end, target, length, comp);
1674 
1675  return target_end;
1676 }
1677 
1678 /*!
1679  * Multi-way merging front-end with unstable mode and without sentinels.
1680  *
1681  * \param seqs_begin Begin iterator of iterator pair input sequence.
1682  * \param seqs_end End iterator of iterator pair input sequence.
1683  * \param target Begin iterator out output sequence.
1684  * \param comp Comparator.
1685  * \param length Maximum length to merge.
1686  * \return End iterator of output sequence.
1687  */
1688 template <typename RandomAccessIteratorPairIterator,
1689  typename RandomAccessIterator3,
1690  typename DiffType, typename Comparator>
1691 RandomAccessIterator3
1692 multiway_merge_stable(RandomAccessIteratorPairIterator seqs_begin,
1693  RandomAccessIteratorPairIterator seqs_end,
1694  RandomAccessIterator3 target, DiffType length,
1695  Comparator comp)
1696 {
1697  STXXL_PARALLEL_PCALL(seqs_end - seqs_begin);
1698 
1699  if (seqs_begin == seqs_end)
1700  return target;
1701 
1702  RandomAccessIterator3 target_end;
1704  ((seqs_end - seqs_begin) >= SETTINGS::multiway_merge_minimal_k) &&
1705  ((sequence_index_t)length >= SETTINGS::multiway_merge_minimal_n)
1706  ))
1707  target_end = parallel_multiway_merge<true>(
1708  seqs_begin, seqs_end, target, length, comp);
1709  else
1710  target_end = sequential_multiway_merge<true, false>(
1711  seqs_begin, seqs_end, target, length, comp);
1712 
1713  return target_end;
1714 }
1715 
1716 /*!
1717  * Multi-way merging front-end with unstable mode and sentinels.
1718  *
1719  * Each sequence must be suffixed with a sentinel as *end(), one item beyond
1720  * the end of each sequence.
1721  *
1722  * \param seqs_begin Begin iterator of iterator pair input sequence.
1723  * \param seqs_end End iterator of iterator pair input sequence.
1724  * \param target Begin iterator out output sequence.
1725  * \param comp Comparator.
1726  * \param length Maximum length to merge.
1727  * \return End iterator of output sequence.
1728  * \pre For each \c i, \c seqs_begin[i].second must be the end marker of the
1729  * sequence, but also reference the one more sentinel element.
1730  */
1731 template <typename RandomAccessIteratorPairIterator,
1732  typename RandomAccessIterator3,
1733  typename DiffType, typename Comparator>
1734 RandomAccessIterator3
1735 multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin,
1736  RandomAccessIteratorPairIterator seqs_end,
1737  RandomAccessIterator3 target, DiffType length,
1738  Comparator comp)
1739 {
1740  if (seqs_begin == seqs_end)
1741  return target;
1742 
1743  STXXL_PARALLEL_PCALL(seqs_end - seqs_begin);
1744 
1746  ((seqs_end - seqs_begin) >= SETTINGS::multiway_merge_minimal_k) &&
1747  ((sequence_index_t)length >= SETTINGS::multiway_merge_minimal_n)
1748  ))
1749  return parallel_multiway_merge<false>(
1750  seqs_begin, seqs_end, target, length, comp);
1751  else
1752  return sequential_multiway_merge<false, true>(
1753  seqs_begin, seqs_end, target, length, comp);
1754 }
1755 
1756 /*!
1757  * Multi-way merging front-end with unstable mode and sentinels.
1758  *
1759  * Each sequence must be suffixed with a sentinel as *end(), one item beyond
1760  * the end of each sequence.
1761  *
1762  * \param seqs_begin Begin iterator of iterator pair input sequence.
1763  * \param seqs_end End iterator of iterator pair input sequence.
1764  * \param target Begin iterator out output sequence.
1765  * \param comp Comparator.
1766  * \param length Maximum length to merge.
1767  * \return End iterator of output sequence.
1768  * \pre For each \c i, \c seqs_begin[i].second must be the end marker of the
1769  * sequence, but also reference the one more sentinel element.
1770  */
1771 template <typename RandomAccessIteratorPairIterator,
1772  typename RandomAccessIterator3,
1773  typename DiffType, typename Comparator>
1774 RandomAccessIterator3
1775 multiway_merge_stable_sentinels(RandomAccessIteratorPairIterator seqs_begin,
1776  RandomAccessIteratorPairIterator seqs_end,
1777  RandomAccessIterator3 target, DiffType length,
1778  Comparator comp)
1779 {
1780  if (seqs_begin == seqs_end)
1781  return target;
1782 
1783  STXXL_PARALLEL_PCALL(seqs_end - seqs_begin);
1784 
1786  ((seqs_end - seqs_begin) >= SETTINGS::multiway_merge_minimal_k) &&
1787  ((sequence_index_t)length >= SETTINGS::multiway_merge_minimal_n)
1788  ))
1789  return parallel_multiway_merge<true>(
1790  seqs_begin, seqs_end, target, length, comp);
1791  else
1792  return sequential_multiway_merge<true, true>(
1793  seqs_begin, seqs_end, target, length, comp);
1794 }
1795 
1796 #endif // STXXL_PARALLEL
1797 
1798 } // namespace parallel
1799 
1801 
1802 #endif // !STXXL_PARALLEL_MULTIWAY_MERGE_HEADER
#define STXXL_ASSERT(condition)
Definition: verbose.h:220
RandomAccessIterator3 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
RandomAccessIterator3 multiway_merge_stable_sentinels(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorPairIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Multi-way merging front-end.
Definition: parallel.h:221
int thread_index_t
Definition: types.h:37
void multiseq_partition(const RanSeqs &begin_seqs, const RanSeqs &end_seqs, const RankType &rank, RankIterator begin_offsets, Comparator comp=std::less< typename std::iterator_traits< typename std::iterator_traits< RanSeqs >::value_type::first_type >::value_type >())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
RandomAccessIterator3 multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
unguarded_iterator< RandomAccessIterator, Comparator > self_type
Our own type.
RandomAccessIterator3 multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_type M)
Sort records comparison-based, see stxxl::sort -- Sorting Comparison-Based.
Definition: sort.h:676
std::iterator_traits< RandomAccessIterator >::value_type value_type
Value type of the iterator.
Comparator & comp
Comparator.
Comparator & comp
Comparator.
LoserTreePointerUnguarded< Stable, ValueType, Comparator > LT
guarded_iterator< RandomAccessIterator, Comparator > self_type
Our own type.
guarded_iterator(RandomAccessIterator begin, RandomAccessIterator end, Comparator &comp)
Constructor.
RandomAccessIterator3 multiway_merge(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorPairIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Multi-way merging dispatcher.
Definition: parallel.h:144
static uint_pair min()
return an uint_pair instance containing the smallest value possible
Definition: uint_types.h:234
uint_pair & operator++()
prefix increment operator (directly manipulates the integer parts)
Definition: uint_types.h:163
#define STXXL_DEBUG_ASSERT(condition)
Definition: verbose.h:250
RandomAccessIterator end
End iterator of the sequence.
#define STXXL_CHECK_EQUAL(a, b)
STXXL_CHECK_EQUAL(a,b) is an assertion macro for unit tests, similar to STXXL_CHECK(a==b). The difference is that STXXL_CHECK_EQUAL(a,b) also prints the values of a and b. Attention: a and b must be printable with std::cout!
Definition: verbose.h:192
void parallel_multiway_merge_exact_splitting(const RandomAccessIteratorIterator &seqs_begin, const RandomAccessIteratorIterator &seqs_end, DiffType length, DiffType total_length, Comparator comp, std::vector< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type > *chunks, const thread_index_t num_threads)
Splitting method for parallel multi-way merge routine: use multisequence selection for exact splittin...
DiffTypeOutputIterator equally_split(DiffType n, thread_index_t p, DiffTypeOutputIterator s)
Split a sequence into parts of almost equal size.
Definition: equally_split.h:41
void parallel_multiway_merge_sampling_splitting(const RandomAccessIteratorIterator &seqs_begin, const RandomAccessIteratorIterator &seqs_end, DiffType length, DiffType total_length, Comparator comp, std::vector< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type > *chunks, const thread_index_t num_threads)
Splitting method for parallel multi-way merge routine: use sampling and binary search for in-exact sp...
RandomAccessIterator3 sequential_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Sequential multi-way merging switch.
RandomAccessIterator3 multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Multi-way merging procedure for a high branching factor, unguarded case.
#define STXXL_PARALLEL_CONDITION(c)
Definition: settings.h:36
RandomAccessIterator3 multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorPairIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Multi-way merging front-end.
Definition: parallel.h:195
bool operator<=(const uint_pair &b) const
less-or-equal comparison operator
Definition: uint_types.h:210
#define STXXL_MERGE3CASE(a, b, c, c0, c1)
#define UNLIKELY(c)
Definition: utils.h:225
static std::vector< std::string > split(const std::string &str, const std::string &sep, unsigned int min_fields=0, unsigned int limit_fields=std::numeric_limits< unsigned int >::max())
Split a string by given separator string. Returns a vector of strings with at least min_fields and at...
Definition: utils.h:56
uint64 sequence_index_t
Definition: types.h:32
RandomAccessIterator3 multiway_merge_stable(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorPairIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Multi-way merging dispatcher.
Definition: parallel.h:169
RandomAccessIterator3 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Multi-way merging procedure for a high branching factor, guarded case.
OutputIterator merge_advance(RandomAccessIterator1 &begin1, RandomAccessIterator1 end1, RandomAccessIterator2 &begin2, RandomAccessIterator2 end2, OutputIterator target, DiffType max_length, Comparator comp)
Merge routine being able to merge only the max_length smallest elements.
Definition: merge.h:161
bool operator<(const uint_pair &b) const
less-than comparison operator
Definition: uint_types.h:204
RandomAccessIterator & iterator()
Convert to wrapped iterator.
#define STXXL_BEGIN_NAMESPACE
Definition: namespace.h:16
#define POS(i)
#define STXXL_MERGE4CASE(a, b, c, d, c0, c1, c2)
#define STOPS(i)
bool is_sorted(ForwardIterator first, ForwardIterator last, StrictWeakOrdering comp)
Definition: is_sorted.h:24
std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, Comparator comp)
Prepare a set of sequences to be merged with a (end) guard (sentinel)
#define STXXL_NO_POINTER_UNGUARDED(T)
RandomAccessIterator3 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Highly efficient 4-way merging procedure.
#define STXXL_PARALLEL_PCALL(n)
RandomAccessIterator3 multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Highly efficient 3-way merging procedure.
RandomAccessIterator current
Current iterator position.
RandomAccessIterator3 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
Basic multi-way merging procedure.
RandomAccessIterator & iterator()
Convert to wrapped iterator.
std::iterator_traits< RandomAccessIterator >::value_type value_type
Value type of the iterator.
Iterator wrapper supporting an implicit supremum at the end of the sequence, dominating all compariso...
LoserTreePointer< Stable, ValueType, Comparator > LT
#define STXXL_DECISION(a, b, c, d)
std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type prepare_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, Comparator comp, int &min_sequence)
Prepare a set of sequences to be merged without a (end) guard.
RandomAccessIterator3 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, DiffType length, Comparator comp)
unguarded_iterator(RandomAccessIterator begin, RandomAccessIterator, Comparator &comp)
Constructor.
RandomAccessIterator current
Current iterator position.
#define STXXL_NO_POINTER(T)
#define STXXL_END_NAMESPACE
Definition: namespace.h:17
std::iterator_traits< typename RandomAccessIteratorPair::first_type >::difference_type iterpair_size(const RandomAccessIteratorPair &p)
Length of a sequence described by a pair of iterators.