• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • Examples
  • File List

random_shuffle.h

00001 /***************************************************************************
00002  *  include/stxxl/bits/algo/random_shuffle.h
00003  *
00004  *  Part of the STXXL. See http://stxxl.sourceforge.net
00005  *
00006  *  Copyright (C) 2007 Manuel Krings
00007  *  Copyright (C) 2007 Markus Westphal
00008  *  Copyright (C) 2009 Andreas Beckmann <[email protected]>
00009  *
00010  *  Distributed under the Boost Software License, Version 1.0.
00011  *  (See accompanying file LICENSE_1_0.txt or copy at
00012  *  http://www.boost.org/LICENSE_1_0.txt)
00013  **************************************************************************/
00014 
00015 #ifndef STXXL_RANDOM_SHUFFLE_HEADER
00016 #define STXXL_RANDOM_SHUFFLE_HEADER
00017 
00018 // TODO: improve main memory consumption in recursion
00019 //        (free stacks buffers)
00020 // TODO: shuffle small input in internal memory
00021 
00022 
00023 #include <stxxl/bits/stream/stream.h>
00024 #include <stxxl/scan>
00025 #include <stxxl/stack>
00026 
00027 
00028 __STXXL_BEGIN_NAMESPACE
00029 
00030 
00033 
00034 
00044 template <typename ExtIterator_,
00045           typename RandomNumberGenerator_,
00046           unsigned BlockSize_,
00047           unsigned PageSize_,
00048           typename AllocStrategy_>
00049 void random_shuffle(ExtIterator_ first,
00050                     ExtIterator_ last,
00051                     RandomNumberGenerator_ & rand,
00052                     unsigned_type M,
00053                     AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY())
00054 {
00055     typedef typename ExtIterator_::value_type value_type;
00056     typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
00057                                             stxxl::grow_shrink2, PageSize_,
00058                                             BlockSize_, void, 0, AllocStrategy_>::result stack_type;
00059     typedef typename stack_type::block_type block_type;
00060 
00061     STXXL_VERBOSE1("random_shuffle: Plain Version");
00062     STXXL_STATIC_ASSERT(int(BlockSize_) < 0 && "This implementation was never tested. Please report to the stxxl developers if you have an ExtIterator_ that works with this implementation.");
00063 
00064     stxxl::int64 n = last - first; // the number of input elements
00065 
00066     // make sure we have at least 6 blocks + 1 page
00067     if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
00068         STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
00069         M = 6 * BlockSize_ + PageSize_ * BlockSize_;
00070         STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
00071     }
00072 
00073     int_type k = M / (3 * BlockSize_); // number of buckets
00074 
00075 
00076     stxxl::int64 i, j, size = 0;
00077 
00078     value_type * temp_array;
00079     typedef typename stxxl::VECTOR_GENERATOR<value_type,
00080                                              PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
00081     temp_vector_type * temp_vector;
00082 
00083     STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets");
00084     stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k);  // no read buffers and M/B-k write buffers
00085 
00086     stack_type ** buckets;
00087 
00088     // create and put buckets into container
00089     buckets = new stack_type *[k];
00090     for (j = 0; j < k; j++)
00091         buckets[j] = new stack_type(pool, 0);
00092 
00093 
00095     typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
00096     input_stream in = stxxl::stream::streamify(first, last);
00097 
00098     // distribute input into random buckets
00099     int_type random_bucket = 0;
00100     for (i = 0; i < n; ++i) {
00101         random_bucket = rand(k);
00102         buckets[random_bucket]->push(*in); // reading the current input element
00103         ++in;                              // go to the next input element
00104     }
00105 
00107     // resize buffers
00108     pool.resize_write(0);
00109     pool.resize_prefetch(PageSize_);
00110 
00111     unsigned_type space_left = M - k * BlockSize_ -
00112                                PageSize_ * BlockSize_; // remaining int space
00113     ExtIterator_ Writer = first;
00114     ExtIterator_ it = first;
00115 
00116     for (i = 0; i < k; i++) {
00117         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
00118     }
00119 
00120     // shuffle each bucket
00121     for (i = 0; i < k; i++) {
00122         buckets[i]->set_prefetch_aggr(PageSize_);
00123         size = buckets[i]->size();
00124 
00125         // does the bucket fit into memory?
00126         if (size * sizeof(value_type) < space_left) {
00127             STXXL_VERBOSE1("random_shuffle: no recursion");
00128 
00129             // copy bucket into temp. array
00130             temp_array = new value_type[size];
00131             for (j = 0; j < size; j++) {
00132                 temp_array[j] = buckets[i]->top();
00133                 buckets[i]->pop();
00134             }
00135 
00136             // shuffle
00137             std::random_shuffle(temp_array, temp_array + size, rand);
00138 
00139             // write back
00140             for (j = 0; j < size; j++) {
00141                 *Writer = temp_array[j];
00142                 ++Writer;
00143             }
00144 
00145             // free memory
00146             delete[] temp_array;
00147         }
00148         else {
00149             STXXL_VERBOSE1("random_shuffle: recursion");
00150 
00151             // copy bucket into temp. stxxl::vector
00152             temp_vector = new temp_vector_type(size);
00153 
00154             for (j = 0; j < size; j++) {
00155                 (*temp_vector)[j] = buckets[i]->top();
00156                 buckets[i]->pop();
00157             }
00158 
00159             pool.resize_prefetch(0);
00160             space_left += PageSize_ * BlockSize_;
00161             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
00162 
00163             // recursive shuffle
00164             stxxl::random_shuffle(temp_vector->begin(),
00165                                   temp_vector->end(), rand, space_left);
00166 
00167             pool.resize_prefetch(PageSize_);
00168 
00169             // write back
00170             for (j = 0; j < size; j++) {
00171                 *Writer = (*temp_vector)[j];
00172                 ++Writer;
00173             }
00174 
00175             // free memory
00176             delete temp_vector;
00177         }
00178 
00179         // free bucket
00180         delete buckets[i];
00181         space_left += BlockSize_;
00182     }
00183 
00184     delete[] buckets;
00185 }
00186 
00192 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
00193           unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
00194 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
00195                     stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last,
00196                     RandomNumberGenerator_ & rand,
00197                     unsigned_type M)
00198 {
00199     typedef stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> ExtIterator_;
00200     typedef typename ExtIterator_::value_type value_type;
00201     typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
00202                                             stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type;
00203     typedef typename stack_type::block_type block_type;
00204 
00205     STXXL_VERBOSE1("random_shuffle: Vector Version");
00206 
00207     // make sure we have at least 6 blocks + 1 page
00208     if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
00209         STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
00210         M = 6 * BlockSize_ + PageSize_ * BlockSize_;
00211         STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
00212     }
00213 
00214     stxxl::int64 n = last - first;     // the number of input elements
00215     int_type k = M / (3 * BlockSize_); // number of buckets
00216 
00217     stxxl::int64 i, j, size = 0;
00218 
00219     value_type * temp_array;
00220     typedef typename stxxl::VECTOR_GENERATOR<value_type,
00221                                              PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
00222     temp_vector_type * temp_vector;
00223 
00224     stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k);  // no read buffers and M/B-k write buffers
00225 
00226     stack_type ** buckets;
00227 
00228     // create and put buckets into container
00229     buckets = new stack_type *[k];
00230     for (j = 0; j < k; j++)
00231         buckets[j] = new stack_type(pool, 0);
00232 
00233 
00234     typedef buf_istream<block_type, typename ExtIterator_::bids_container_iterator> buf_istream_type;
00235     typedef buf_ostream<block_type, typename ExtIterator_::bids_container_iterator> buf_ostream_type;
00236 
00237     first.flush();     // flush container
00238 
00239     // create prefetching stream,
00240     buf_istream_type in(first.bid(), last.bid() + ((last.block_offset()) ? 1 : 0), 2);
00241     // create buffered write stream for blocks
00242     buf_ostream_type out(first.bid(), 2);
00243 
00244     ExtIterator_ _cur = first - first.block_offset();
00245 
00246     // leave part of the block before _begin untouched (e.g. copy)
00247     for ( ; _cur != first; ++_cur)
00248     {
00249         typename ExtIterator_::value_type tmp;
00250         in >> tmp;
00251         out << tmp;
00252     }
00253 
00255 
00256     // distribute input into random buckets
00257     int_type random_bucket = 0;
00258     for (i = 0; i < n; ++i, ++_cur) {
00259         random_bucket = rand(k);
00260         typename ExtIterator_::value_type tmp;
00261         in >> tmp;
00262         buckets[random_bucket]->push(tmp); // reading the current input element
00263     }
00264 
00266     // resize buffers
00267     pool.resize_write(0);
00268     pool.resize_prefetch(PageSize_);
00269 
00270     unsigned_type space_left = M - k * BlockSize_ -
00271                                PageSize_ * BlockSize_; // remaining int space
00272 
00273     for (i = 0; i < k; i++) {
00274         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
00275     }
00276 
00277     // shuffle each bucket
00278     for (i = 0; i < k; i++) {
00279         buckets[i]->set_prefetch_aggr(PageSize_);
00280         size = buckets[i]->size();
00281 
00282         // does the bucket fit into memory?
00283         if (size * sizeof(value_type) < space_left) {
00284             STXXL_VERBOSE1("random_shuffle: no recursion");
00285 
00286             // copy bucket into temp. array
00287             temp_array = new value_type[size];
00288             for (j = 0; j < size; j++) {
00289                 temp_array[j] = buckets[i]->top();
00290                 buckets[i]->pop();
00291             }
00292 
00293             // shuffle
00294             std::random_shuffle(temp_array, temp_array + size, rand);
00295 
00296             // write back
00297             for (j = 0; j < size; j++) {
00298                 typename ExtIterator_::value_type tmp;
00299                 tmp = temp_array[j];
00300                 out << tmp;
00301             }
00302 
00303             // free memory
00304             delete[] temp_array;
00305         } else {
00306             STXXL_VERBOSE1("random_shuffle: recursion");
00307             // copy bucket into temp. stxxl::vector
00308             temp_vector = new temp_vector_type(size);
00309 
00310             for (j = 0; j < size; j++) {
00311                 (*temp_vector)[j] = buckets[i]->top();
00312                 buckets[i]->pop();
00313             }
00314 
00315             pool.resize_prefetch(0);
00316             space_left += PageSize_ * BlockSize_;
00317 
00318             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
00319 
00320             // recursive shuffle
00321             stxxl::random_shuffle(temp_vector->begin(),
00322                                   temp_vector->end(), rand, space_left);
00323 
00324             pool.resize_prefetch(PageSize_);
00325 
00326             // write back
00327             for (j = 0; j < size; j++) {
00328                 typename ExtIterator_::value_type tmp;
00329                 tmp = (*temp_vector)[j];
00330                 out << tmp;
00331             }
00332 
00333             // free memory
00334             delete temp_vector;
00335         }
00336 
00337         // free bucket
00338         delete buckets[i];
00339         space_left += BlockSize_;
00340     }
00341 
00342     delete[] buckets;
00343 
00344     // leave part of the block after _end untouched
00345     if (last.block_offset())
00346     {
00347         ExtIterator_ _last_block_end = last + (block_type::size - last.block_offset());
00348         for ( ; _cur != _last_block_end; ++_cur)
00349         {
00350             typename ExtIterator_::value_type tmp;
00351             in >> tmp;
00352             out << tmp;
00353         }
00354     }
00355 }
00356 
00361 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
00362           unsigned BlockSize_, typename PgTp_, unsigned PageSize_>
00363 inline
00364 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
00365                     stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last,
00366                     unsigned_type M)
00367 {
00368     stxxl::random_number<> rand;
00369     stxxl::random_shuffle(first, last, rand, M);
00370 }
00371 
00373 
00374 __STXXL_END_NAMESPACE
00375 
00376 #endif // !STXXL_RANDOM_SHUFFLE_HEADER
00377 // vim: et:ts=4:sw=4

Generated by  doxygen 1.7.1