17 #ifndef STXXL_PARALLEL_MULTIWAY_MERGESORT_HEADER
18 #define STXXL_PARALLEL_MULTIWAY_MERGESORT_HEADER
39 template <
typename DiffType>
53 template <
typename RandomAccessIterator>
54 struct PMWMSSortingData
56 typedef typename std::iterator_traits<RandomAccessIterator>::value_type
58 typedef typename std::iterator_traits<RandomAccessIterator>::difference_type
62 RandomAccessIterator source;
72 ValueType** temporaries;
73 #if STXXL_MULTIWAY_MERGESORT_COPY_LAST
75 RandomAccessIterator* sorting_places;
77 ValueType** merging_places;
80 ValueType** sorting_places;
82 RandomAccessIterator* merging_places;
89 std::vector<PMWMSPiece<DiffType> >* pieces;
93 template <
typename RandomAccessIterator>
101 PMWMSSortingData<RandomAccessIterator>* sd;
109 template <
typename RandomAccessIterator,
typename DiffType>
110 inline void determine_samples(PMWMSSorterPU<RandomAccessIterator>* d,
111 DiffType& num_samples)
113 PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
115 num_samples = SETTINGS::sort_mwms_oversampling * d->num_threads - 1;
117 std::vector<DiffType> es(num_samples + 2);
121 for (DiffType i = 0; i < num_samples; i++)
122 sd->samples[d->iam * num_samples + i] = sd->source[sd->starts[d->iam] + es[i + 1]];
130 template <
bool Stable,
typename RandomAccessIterator,
typename Comparator>
131 inline void parallel_sort_mwms_pu(PMWMSSorterPU<RandomAccessIterator>* d,
134 typedef typename std::iterator_traits<RandomAccessIterator>::value_type
136 typedef typename std::iterator_traits<RandomAccessIterator>::difference_type
139 Timing<inactive_tag> t;
143 PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
147 DiffType length_local = sd->starts[iam + 1] - sd->starts[iam];
149 #if STXXL_MULTIWAY_MERGESORT_COPY_LAST
150 typedef RandomAccessIterator SortingPlacesIterator;
152 sd->sorting_places[iam] = sd->source + sd->starts[iam];
154 typedef ValueType* SortingPlacesIterator;
156 sd->sorting_places[iam] = sd->temporaries[iam] =
static_cast<ValueType*
>(::operator
new (
sizeof(ValueType) * (length_local + 1)));
158 std::uninitialized_copy(sd->source + sd->starts[iam], sd->source + sd->starts[iam] + length_local, sd->sorting_places[iam]);
163 std::stable_sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp);
165 std::sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp);
173 if (SETTINGS::sort_splitting == SETTINGS::SAMPLING)
175 DiffType num_samples;
176 determine_samples(d, num_samples);
180 t.tic(
"sample/wait");
183 std::sort(sd->samples, sd->samples + (num_samples * d->num_threads), comp);
187 for (
int s = 0; s < d->num_threads; s++)
190 if (num_samples * iam > 0)
191 sd->pieces[iam][s].begin =
192 std::lower_bound(sd->sorting_places[s],
193 sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
194 sd->samples[num_samples * iam],
196 - sd->sorting_places[s];
199 sd->pieces[iam][s].begin = 0;
201 if ((num_samples * (iam + 1)) < (num_samples * d->num_threads))
202 sd->pieces[iam][s].end =
203 std::lower_bound(sd->sorting_places[s],
204 sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
205 sd->samples[num_samples * (iam + 1)],
207 - sd->sorting_places[s];
210 sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
213 else if (SETTINGS::sort_splitting == SETTINGS::EXACT)
219 std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
220 for (
int s = 0; s < d->num_threads; s++)
221 seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
223 std::vector<SortingPlacesIterator> offsets(d->num_threads);
226 if (iam < d->num_threads - 1)
227 multiseq_partition(seqs.begin(), seqs.end(), sd->starts[iam + 1], offsets.begin(), comp);
229 for (
int seq = 0; seq < d->num_threads; seq++)
232 if (iam < (d->num_threads - 1))
233 sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
236 sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq];
241 for (
int seq = 0; seq < d->num_threads; seq++)
245 sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
248 sd->pieces[iam][seq].begin = 0;
255 DiffType offset = 0, length_am = 0;
256 for (
int s = 0; s < d->num_threads; s++)
258 length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
259 offset += sd->pieces[iam][s].begin;
262 #if STXXL_MULTIWAY_MERGESORT_COPY_LAST
266 sd->merging_places[iam] = sd->temporaries[iam] =
new ValueType[length_am];
269 sd->merging_places[iam] = sd->source + offset;
271 std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
273 for (
int s = 0; s < d->num_threads; s++)
275 seqs[s] = std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin, sd->sorting_places[s] + sd->pieces[iam][s].end);
280 sequential_multiway_merge<Stable, false>(seqs.begin(), seqs.end(), sd->merging_places[iam], length_am, comp);
288 #if STXXL_MULTIWAY_MERGESORT_COPY_LAST
290 std::copy(sd->merging_places[iam], sd->merging_places[iam] + length_am, sd->source + offset);
293 delete sd->temporaries[iam];
308 template <
bool Stable,
309 typename RandomAccessIterator,
typename Comparator>
311 parallel_sort_mwms(RandomAccessIterator begin,
312 RandomAccessIterator end,
318 typedef typename std::iterator_traits<RandomAccessIterator>::value_type
320 typedef typename std::iterator_traits<RandomAccessIterator>::difference_type
323 DiffType n = end - begin;
331 PMWMSSortingData<RandomAccessIterator> sd;
334 sd.temporaries = new ValueType*[num_threads];
335 #if STXXL_MULTIWAY_MERGESORT_COPY_LAST
336 sd.sorting_places =
new RandomAccessIterator[num_threads];
337 sd.merging_places =
new ValueType*[num_threads];
339 sd.sorting_places =
new ValueType*[num_threads];
340 sd.merging_places =
new RandomAccessIterator[num_threads];
342 if (SETTINGS::sort_splitting == SETTINGS::SAMPLING)
343 sd.samples =
new ValueType[num_threads * (SETTINGS::sort_mwms_oversampling * num_threads - 1)];
346 sd.offsets =
new DiffType[num_threads - 1];
347 sd.pieces =
new std::vector<PMWMSPiece<DiffType> >[num_threads];
348 for (
int s = 0; s < num_threads; s++)
349 sd.pieces[s].resize(num_threads);
350 PMWMSSorterPU<RandomAccessIterator>* pus =
new PMWMSSorterPU<RandomAccessIterator>[num_threads];
351 DiffType* starts = sd.starts =
new DiffType[num_threads + 1];
353 DiffType chunk_length = n / num_threads,
split = n % num_threads, start = 0;
354 for (
int i = 0; i < num_threads; i++)
357 start += (i <
split) ? (chunk_length + 1) : chunk_length;
358 pus[i].num_threads = num_threads;
362 starts[num_threads] = start;
365 #pragma omp parallel num_threads(num_threads)
366 parallel_sort_mwms_pu<Stable>(&(pus[omp_get_thread_num()]), comp);
369 delete[] sd.temporaries;
370 delete[] sd.sorting_places;
371 delete[] sd.merging_places;
373 if (SETTINGS::sort_splitting == SETTINGS::SAMPLING)
384 #endif // STXXL_PARALLEL
388 #endif // !STXXL_PARALLEL_MULTIWAY_MERGESORT_HEADER
void multiseq_partition(const RanSeqs &begin_seqs, const RanSeqs &end_seqs, const RankType &rank, RankIterator begin_offsets, Comparator comp=std::less< typename std::iterator_traits< typename std::iterator_traits< RanSeqs >::value_type::first_type >::value_type >())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
void sort(ExtIterator first, ExtIterator last, StrictWeakOrdering cmp, unsigned_type M)
Sort records comparison-based, see stxxl::sort -- Sorting Comparison-Based.
#define STXXL_DEBUG_ASSERT(condition)
DiffTypeOutputIterator equally_split(DiffType n, thread_index_t p, DiffTypeOutputIterator s)
Split a sequence into parts of almost equal size.
static std::vector< std::string > split(const std::string &str, const std::string &sep, unsigned int min_fields=0, unsigned int limit_fields=std::numeric_limits< unsigned int >::max())
Split a string by given separator string. Returns a vector of strings with at least min_fields and at...
#define STXXL_BEGIN_NAMESPACE
bool is_sorted(ForwardIterator first, ForwardIterator last, StrictWeakOrdering comp)
#define STXXL_PARALLEL_PCALL(n)
#define STXXL_END_NAMESPACE