00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef STXXL_PQ_LOSERTREE_HEADER
00017 #define STXXL_PQ_LOSERTREE_HEADER
00018
00019 __STXXL_BEGIN_NAMESPACE
00020
00024
00027 namespace priority_queue_local
00028 {
00030
00035 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00036 class loser_tree : private noncopyable
00037 {
00038 public:
00039 typedef ValTp_ value_type;
00040 typedef Cmp_ comparator_type;
00041 typedef value_type Element;
00042
00043 private:
00044 #if STXXL_PQ_INTERNAL_LOSER_TREE
00045 struct Entry
00046 {
00047 value_type key;
00048 unsigned_type index;
00049 };
00050 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00051
00052 comparator_type cmp;
00053
00054 internal_bounded_stack<unsigned_type, KNKMAX> free_slots;
00055
00056 unsigned_type size_;
00057 unsigned_type logK;
00058 unsigned_type k;
00059
00060 Element sentinel;
00061
00062 #if STXXL_PQ_INTERNAL_LOSER_TREE
00063
00064
00065 Entry entry[KNKMAX];
00066 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00067
00068
00069
00070
00071 Element * current[KNKMAX];
00072 Element * current_end[KNKMAX];
00073 Element * segment[KNKMAX];
00074 unsigned_type segment_size[KNKMAX];
00075
00076 unsigned_type mem_cons_;
00077
00078
00079 unsigned_type initWinner(unsigned_type root);
00080 void update_on_insert(unsigned_type node, const Element & newKey, unsigned_type newIndex,
00081 Element * winnerKey, unsigned_type * winnerIndex, unsigned_type * mask);
00082 void deallocate_segment(unsigned_type slot);
00083 void doubleK();
00084 void compactTree();
00085 void rebuildLoserTree();
00086 bool is_segment_empty(unsigned_type slot);
00087 void multi_merge_k(Element * target, unsigned_type length);
00088
00089 #if STXXL_PQ_INTERNAL_LOSER_TREE
00090 template <unsigned LogK>
00091 void multi_merge_f(Element * target, unsigned_type length)
00092 {
00093
00094
00095
00096 Element * done = target + length;
00097 Entry * regEntry = entry;
00098 Element ** regStates = current;
00099 unsigned_type winnerIndex = regEntry[0].index;
00100 Element winnerKey = regEntry[0].key;
00101 Element * winnerPos;
00102
00103
00104 assert(logK >= LogK);
00105 while (target != done)
00106 {
00107 winnerPos = regStates[winnerIndex];
00108
00109
00110 *target = winnerKey;
00111
00112
00113 ++winnerPos;
00114 regStates[winnerIndex] = winnerPos;
00115 winnerKey = *winnerPos;
00116
00117
00118 if (is_sentinel(winnerKey))
00119 {
00120 deallocate_segment(winnerIndex);
00121 }
00122 ++target;
00123
00124
00125 #define TreeStep(L) \
00126 if (1 << LogK >= 1 << L) { \
00127 Entry * pos ## L = regEntry + ((winnerIndex + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \
00128 Element key ## L = pos ## L->key; \
00129 if (cmp(winnerKey, key ## L)) { \
00130 unsigned_type index ## L = pos ## L->index; \
00131 pos ## L->key = winnerKey; \
00132 pos ## L->index = winnerIndex; \
00133 winnerKey = key ## L; \
00134 winnerIndex = index ## L; \
00135 } \
00136 }
00137 TreeStep(10);
00138 TreeStep(9);
00139 TreeStep(8);
00140 TreeStep(7);
00141 TreeStep(6);
00142 TreeStep(5);
00143 TreeStep(4);
00144 TreeStep(3);
00145 TreeStep(2);
00146 TreeStep(1);
00147 #undef TreeStep
00148 }
00149 regEntry[0].index = winnerIndex;
00150 regEntry[0].key = winnerKey;
00151 }
00152 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00153
00154 public:
00155 bool is_sentinel(const Element & a)
00156 {
00157 return !(cmp(cmp.min_value(), a));
00158 }
00159 bool not_sentinel(const Element & a)
00160 {
00161 return cmp(cmp.min_value(), a);
00162 }
00163
00164 public:
00165 loser_tree();
00166 ~loser_tree();
00167 void init();
00168
00169 void swap(loser_tree & obj)
00170 {
00171 std::swap(cmp, obj.cmp);
00172 std::swap(free_slots, obj.free_slots);
00173 std::swap(size_, obj.size_);
00174 std::swap(logK, obj.logK);
00175 std::swap(k, obj.k);
00176 std::swap(sentinel, obj.sentinel);
00177 #if STXXL_PQ_INTERNAL_LOSER_TREE
00178 swap_1D_arrays(entry, obj.entry, KNKMAX);
00179 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00180 swap_1D_arrays(current, obj.current, KNKMAX);
00181 swap_1D_arrays(current_end, obj.current_end, KNKMAX);
00182 swap_1D_arrays(segment, obj.segment, KNKMAX);
00183 swap_1D_arrays(segment_size, obj.segment_size, KNKMAX);
00184 std::swap(mem_cons_, obj.mem_cons_);
00185 }
00186
00187 void multi_merge(Element * begin, Element * end)
00188 {
00189 multi_merge(begin, end - begin);
00190 }
00191 void multi_merge(Element *, unsigned_type length);
00192
00193 unsigned_type mem_cons() const { return mem_cons_; }
00194
00195 bool is_space_available() const
00196 {
00197 return (k < KNKMAX) || !free_slots.empty();
00198 }
00199
00200 void insert_segment(Element * target, unsigned_type length);
00201 unsigned_type size() { return size_; }
00202 };
00203
00205 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00206 loser_tree<ValTp_, Cmp_, KNKMAX>::loser_tree() : size_(0), logK(0), k(1), mem_cons_(0)
00207 {
00208 free_slots.push(0);
00209 segment[0] = NULL;
00210 current[0] = &sentinel;
00211 current_end[0] = &sentinel;
00212
00213
00214 init();
00215 }
00216
00217 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00218 void loser_tree<ValTp_, Cmp_, KNKMAX>::init()
00219 {
00220 assert(!cmp(cmp.min_value(), cmp.min_value()));
00221 sentinel = cmp.min_value();
00222 rebuildLoserTree();
00223 #if STXXL_PQ_INTERNAL_LOSER_TREE
00224 assert(current[entry[0].index] == &sentinel);
00225 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00226 }
00227
00228
00229
00230 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00231 void loser_tree<ValTp_, Cmp_, KNKMAX>::rebuildLoserTree()
00232 {
00233 #if STXXL_PQ_INTERNAL_LOSER_TREE
00234 assert(LOG2<KNKMAX>::floor == LOG2<KNKMAX>::ceil);
00235 unsigned_type winner = initWinner(1);
00236 entry[0].index = winner;
00237 entry[0].key = *(current[winner]);
00238 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00239 }
00240
00241
00242 #if STXXL_PQ_INTERNAL_LOSER_TREE
00243
00244
00245
00246
00247
00248 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00249 unsigned_type loser_tree<ValTp_, Cmp_, KNKMAX>::initWinner(unsigned_type root)
00250 {
00251 if (root >= k) {
00252 return root - k;
00253 } else {
00254 unsigned_type left = initWinner(2 * root);
00255 unsigned_type right = initWinner(2 * root + 1);
00256 Element lk = *(current[left]);
00257 Element rk = *(current[right]);
00258 if (!(cmp(lk, rk))) {
00259 entry[root].index = right;
00260 entry[root].key = rk;
00261 return left;
00262 } else {
00263 entry[root].index = left;
00264 entry[root].key = lk;
00265 return right;
00266 }
00267 }
00268 }
00269
00270
00271
00272
00273
00274
00275
00276 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00277 void loser_tree<ValTp_, Cmp_, KNKMAX>::update_on_insert(
00278 unsigned_type node,
00279 const Element & newKey,
00280 unsigned_type newIndex,
00281 Element * winnerKey,
00282 unsigned_type * winnerIndex,
00283 unsigned_type * mask)
00284 {
00285 if (node == 0) {
00286 *mask = 1 << (logK - 1);
00287 *winnerKey = entry[0].key;
00288 *winnerIndex = entry[0].index;
00289 if (cmp(entry[node].key, newKey))
00290 {
00291 entry[node].key = newKey;
00292 entry[node].index = newIndex;
00293 }
00294 } else {
00295 update_on_insert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask);
00296 Element loserKey = entry[node].key;
00297 unsigned_type loserIndex = entry[node].index;
00298 if ((*winnerIndex & *mask) != (newIndex & *mask)) {
00299 if (cmp(loserKey, newKey)) {
00300 if (cmp(*winnerKey, newKey)) {
00301 entry[node].key = *winnerKey;
00302 entry[node].index = *winnerIndex;
00303 } else {
00304 entry[node].key = newKey;
00305 entry[node].index = newIndex;
00306 }
00307 }
00308 *winnerKey = loserKey;
00309 *winnerIndex = loserIndex;
00310 }
00311
00312
00313
00314
00315
00316
00317
00318 *mask >>= 1;
00319 }
00320 }
00321 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00322
00323
00324
00325 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00326 void loser_tree<ValTp_, Cmp_, KNKMAX>::doubleK()
00327 {
00328 STXXL_VERBOSE3("loser_tree::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_slots.size());
00329 assert(k > 0);
00330 assert(k < KNKMAX);
00331 assert(free_slots.empty());
00332
00333
00334
00335 for (unsigned_type i = 2 * k - 1; i >= k; i--)
00336 {
00337 current[i] = &sentinel;
00338 current_end[i] = &sentinel;
00339 segment[i] = NULL;
00340 free_slots.push(i);
00341 }
00342
00343
00344 k *= 2;
00345 logK++;
00346
00347 STXXL_VERBOSE3("loser_tree::doubleK (after) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_slots.size());
00348 assert(!free_slots.empty());
00349
00350
00351 rebuildLoserTree();
00352 }
00353
00354
00355
00356 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00357 void loser_tree<ValTp_, Cmp_, KNKMAX>::compactTree()
00358 {
00359 STXXL_VERBOSE3("loser_tree::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_slots.size());
00360 assert(logK > 0);
00361
00362
00363 unsigned_type pos = 0;
00364 unsigned_type last_empty = 0;
00365 for ( ; pos < k; pos++)
00366 {
00367 if (not_sentinel(*(current[pos])))
00368 {
00369 segment_size[last_empty] = segment_size[pos];
00370 current[last_empty] = current[pos];
00371 current_end[last_empty] = current_end[pos];
00372 segment[last_empty] = segment[pos];
00373 last_empty++;
00374 }
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386 }
00387
00388
00389 while ((k > 1) && ((k / 2) >= last_empty))
00390 {
00391 k /= 2;
00392 logK--;
00393 }
00394
00395
00396 free_slots.clear();
00397 for ( ; last_empty < k; last_empty++)
00398 {
00399 current[last_empty] = &sentinel;
00400 current_end[last_empty] = &sentinel;
00401 free_slots.push(last_empty);
00402 }
00403
00404 STXXL_VERBOSE3("loser_tree::compactTree (after) k=" << k << " logK=" << logK << " #free=" << free_slots.size());
00405
00406
00407 rebuildLoserTree();
00408 }
00409
00410
00411
00412
00413 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00414 void loser_tree<ValTp_, Cmp_, KNKMAX>::insert_segment(Element * target, unsigned_type length)
00415 {
00416 STXXL_VERBOSE2("loser_tree::insert_segment(" << target << "," << length << ")");
00417
00418
00419 if (length > 0)
00420 {
00421 assert(not_sentinel(target[0]));
00422 assert(not_sentinel(target[length - 1]));
00423 assert(is_sentinel(target[length]));
00424
00425
00426 if (free_slots.empty())
00427 {
00428 doubleK();
00429 }
00430 assert(!free_slots.empty());
00431 unsigned_type index = free_slots.top();
00432 free_slots.pop();
00433
00434
00435
00436 current[index] = segment[index] = target;
00437 current_end[index] = target + length;
00438 segment_size[index] = (length + 1) * sizeof(value_type);
00439 mem_cons_ += (length + 1) * sizeof(value_type);
00440 size_ += length;
00441
00442 #if STXXL_PQ_INTERNAL_LOSER_TREE
00443
00444 Element dummyKey;
00445 unsigned_type dummyIndex;
00446 unsigned_type dummyMask;
00447 update_on_insert((index + k) >> 1, *target, index,
00448 &dummyKey, &dummyIndex, &dummyMask);
00449 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00450 } else {
00451
00452
00453
00454
00455 delete[] target;
00456 }
00457 }
00458
00459
00460 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00461 loser_tree<ValTp_, Cmp_, KNKMAX>::~loser_tree()
00462 {
00463 STXXL_VERBOSE1("loser_tree::~loser_tree()");
00464 for (unsigned_type i = 0; i < k; ++i)
00465 {
00466 if (segment[i])
00467 {
00468 STXXL_VERBOSE2("loser_tree::~loser_tree() deleting segment " << i);
00469 delete[] segment[i];
00470 mem_cons_ -= segment_size[i];
00471 }
00472 }
00473
00474 assert(mem_cons_ == 0);
00475 }
00476
00477
00478 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00479 void loser_tree<ValTp_, Cmp_, KNKMAX>::deallocate_segment(unsigned_type slot)
00480 {
00481
00482
00483 STXXL_VERBOSE2("loser_tree::deallocate_segment() deleting segment " <<
00484 slot << " address: " << segment[slot] << " size: " << (segment_size[slot] / sizeof(value_type)) - 1);
00485 current[slot] = &sentinel;
00486 current_end[slot] = &sentinel;
00487
00488
00489 delete[] segment[slot];
00490 segment[slot] = NULL;
00491 mem_cons_ -= segment_size[slot];
00492
00493
00494 free_slots.push(slot);
00495 }
00496
00497
00498
00499
00500
00501
00502
00503 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00504 void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element * target, unsigned_type length)
00505 {
00506 STXXL_VERBOSE3("loser_tree::multi_merge(target=" << target << ", len=" << length << ") k=" << k);
00507
00508 if (length == 0)
00509 return;
00510
00511 assert(k > 0);
00512 assert(length <= size_);
00513
00514
00515
00516 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL
00517 priority_queue_local::invert_order<Cmp_, value_type, value_type> inv_cmp(cmp);
00518 #endif
00519 switch (logK) {
00520 case 0:
00521 assert(k == 1);
00522 #if STXXL_PQ_INTERNAL_LOSER_TREE
00523 assert(entry[0].index == 0);
00524 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00525 assert(free_slots.empty());
00526 memcpy(target, current[0], length * sizeof(Element));
00527
00528 current[0] += length;
00529 #if STXXL_PQ_INTERNAL_LOSER_TREE
00530 entry[0].key = **current;
00531 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00532 if (is_segment_empty(0))
00533 deallocate_segment(0);
00534
00535 break;
00536 case 1:
00537 assert(k == 2);
00538 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL
00539 {
00540 std::pair<Element *, Element *> seqs[2] =
00541 {
00542 std::make_pair(current[0], current_end[0]),
00543 std::make_pair(current[1], current_end[1])
00544 };
00545 parallel::multiway_merge_sentinel(seqs, seqs + 2, target, inv_cmp, length);
00546 current[0] = seqs[0].first;
00547 current[1] = seqs[1].first;
00548 }
00549 #else
00550 merge_iterator(current[0], current[1], target, length, cmp);
00551 rebuildLoserTree();
00552 #endif
00553 if (is_segment_empty(0))
00554 deallocate_segment(0);
00555
00556 if (is_segment_empty(1))
00557 deallocate_segment(1);
00558
00559 break;
00560 case 2:
00561 assert(k == 4);
00562 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL
00563 {
00564 std::pair<Element *, Element *> seqs[4] =
00565 {
00566 std::make_pair(current[0], current_end[0]),
00567 std::make_pair(current[1], current_end[1]),
00568 std::make_pair(current[2], current_end[2]),
00569 std::make_pair(current[3], current_end[3])
00570 };
00571 parallel::multiway_merge_sentinel(seqs, seqs + 4, target, inv_cmp, length);
00572 current[0] = seqs[0].first;
00573 current[1] = seqs[1].first;
00574 current[2] = seqs[2].first;
00575 current[3] = seqs[3].first;
00576 }
00577 #else
00578 if (is_segment_empty(3))
00579 merge3_iterator(current[0], current[1], current[2], target, length, cmp);
00580 else
00581 merge4_iterator(current[0], current[1], current[2], current[3], target, length, cmp);
00582
00583 rebuildLoserTree();
00584 #endif
00585 if (is_segment_empty(0))
00586 deallocate_segment(0);
00587
00588 if (is_segment_empty(1))
00589 deallocate_segment(1);
00590
00591 if (is_segment_empty(2))
00592 deallocate_segment(2);
00593
00594 if (is_segment_empty(3))
00595 deallocate_segment(3);
00596
00597 break;
00598 #if !(STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL)
00599 case 3: multi_merge_f<3>(target, length);
00600 break;
00601 case 4: multi_merge_f<4>(target, length);
00602 break;
00603 case 5: multi_merge_f<5>(target, length);
00604 break;
00605 case 6: multi_merge_f<6>(target, length);
00606 break;
00607 case 7: multi_merge_f<7>(target, length);
00608 break;
00609 case 8: multi_merge_f<8>(target, length);
00610 break;
00611 case 9: multi_merge_f<9>(target, length);
00612 break;
00613 case 10: multi_merge_f<10>(target, length);
00614 break;
00615 #endif
00616 default:
00617 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_INTERNAL
00618 {
00619 std::vector<std::pair<Element *, Element *> > seqs;
00620 std::vector<int_type> orig_seq_index;
00621 for (unsigned int i = 0; i < k; ++i)
00622 {
00623 if (current[i] != current_end[i] && !is_sentinel(*current[i]))
00624 {
00625 seqs.push_back(std::make_pair(current[i], current_end[i]));
00626 orig_seq_index.push_back(i);
00627 }
00628 }
00629
00630 parallel::multiway_merge_sentinel(seqs.begin(), seqs.end(), target, inv_cmp, length);
00631
00632 for (unsigned int i = 0; i < seqs.size(); ++i)
00633 {
00634 int_type seg = orig_seq_index[i];
00635 current[seg] = seqs[i].first;
00636 }
00637
00638 for (unsigned int i = 0; i < k; ++i)
00639 if (is_segment_empty(i))
00640 {
00641 STXXL_VERBOSE3("deallocated " << i);
00642 deallocate_segment(i);
00643 }
00644 }
00645 #else
00646 multi_merge_k(target, length);
00647 #endif
00648 break;
00649 }
00650
00651
00652 size_ -= length;
00653
00654
00655 {
00656 const unsigned_type num_segments_used = k - free_slots.size();
00657 const unsigned_type num_segments_trigger = k - (3 * k / 5);
00658
00659
00660
00661
00662
00663 STXXL_VERBOSE3("loser_tree compact? k=" << k << " #used=" << num_segments_used
00664 << " <= #trigger=" << num_segments_trigger << " ==> "
00665 << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
00666 << " || "
00667 << ((k == 4 && !free_slots.empty() && !is_segment_empty(3)) ? "yes" : "no ")
00668 << " #free=" << free_slots.size());
00669 if (k > 1 && ((num_segments_used <= num_segments_trigger) ||
00670 (k == 4 && !free_slots.empty() && !is_segment_empty(3))))
00671 {
00672 compactTree();
00673 }
00674 }
00675
00676 }
00677
00678
00679
00680 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00681 inline bool loser_tree<ValTp_, Cmp_, KNKMAX>::is_segment_empty(unsigned_type slot)
00682 {
00683 return (is_sentinel(*(current[slot])) && (current[slot] != &sentinel));
00684 }
00685
00686 #if STXXL_PQ_INTERNAL_LOSER_TREE
00687
00688 template <class ValTp_, class Cmp_, unsigned KNKMAX>
00689 void loser_tree<ValTp_, Cmp_, KNKMAX>::
00690 multi_merge_k(Element * target, unsigned_type length)
00691 {
00692 Entry * currentPos;
00693 Element currentKey;
00694 unsigned_type currentIndex;
00695 unsigned_type kReg = k;
00696 Element * done = target + length;
00697 unsigned_type winnerIndex = entry[0].index;
00698 Element winnerKey = entry[0].key;
00699 Element * winnerPos;
00700
00701 while (target != done)
00702 {
00703 winnerPos = current[winnerIndex];
00704
00705
00706 *target = winnerKey;
00707
00708
00709 ++winnerPos;
00710 current[winnerIndex] = winnerPos;
00711 winnerKey = *winnerPos;
00712
00713
00714 if (is_sentinel(winnerKey))
00715 deallocate_segment(winnerIndex);
00716
00717
00718
00719 for (unsigned_type i = (winnerIndex + kReg) >> 1; i > 0; i >>= 1) {
00720 currentPos = entry + i;
00721 currentKey = currentPos->key;
00722 if (cmp(winnerKey, currentKey)) {
00723 currentIndex = currentPos->index;
00724 currentPos->key = winnerKey;
00725 currentPos->index = winnerIndex;
00726 winnerKey = currentKey;
00727 winnerIndex = currentIndex;
00728 }
00729 }
00730
00731 ++target;
00732 }
00733 entry[0].index = winnerIndex;
00734 entry[0].key = winnerKey;
00735 }
00736 #endif //STXXL_PQ_INTERNAL_LOSER_TREE
00737 }
00738
00740
00741 __STXXL_END_NAMESPACE
00742
00743 #endif // !STXXL_PQ_LOSERTREE_HEADER
00744