• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • Examples
  • File List

random_shuffle.h

00001 /***************************************************************************
00002  *  include/stxxl/bits/algo/random_shuffle.h
00003  *
00004  *  Part of the STXXL. See http://stxxl.sourceforge.net
00005  *
00006  *  Copyright (C) 2007 Manuel Krings
00007  *  Copyright (C) 2007 Markus Westphal
00008  *
00009  *  Distributed under the Boost Software License, Version 1.0.
00010  *  (See accompanying file LICENSE_1_0.txt or copy at
00011  *  http://www.boost.org/LICENSE_1_0.txt)
00012  **************************************************************************/
00013 
00014 #ifndef STXXL_RANDOM_SHUFFLE_HEADER
00015 #define STXXL_RANDOM_SHUFFLE_HEADER
00016 
00017 // TODO: improve main memory consumption in recursion
00018 //        (free stacks buffers)
00019 
00020 
00021 #include <stxxl/bits/stream/stream.h>
00022 #include <stxxl/scan>
00023 #include <stxxl/stack>
00024 
00025 
00026 __STXXL_BEGIN_NAMESPACE
00027 
00028 namespace random_shuffle_local
00029 {
00030     // efficiently writes data into an stxxl::vector with overlapping of I/O and
00031     // computation
00032     template <class ExtIterator>
00033     class write_vector
00034     {
00035         typedef typename ExtIterator::size_type size_type;
00036         typedef typename ExtIterator::value_type value_type;
00037         typedef typename ExtIterator::block_type block_type;
00038         typedef typename ExtIterator::const_iterator ConstExtIterator;
00039         typedef stxxl::buf_ostream<block_type, typename ExtIterator::bids_container_iterator> buf_ostream_type;
00040 
00041         ExtIterator it;
00042         unsigned_type nbuffers;
00043         buf_ostream_type * outstream;
00044 
00045     public:
00046         write_vector(ExtIterator begin,
00047                      unsigned nbuffers_ = 2  // buffers to use for overlapping (>=2 recommended)
00048                      ) : it(begin), nbuffers(nbuffers_)
00049         {
00050             outstream = new buf_ostream_type(it.bid(), nbuffers);
00051         }
00052 
00053         value_type & operator * ()
00054         {
00055             if (it.block_offset() == 0)
00056                 it.touch();
00057             // tells the vector that the block was modified
00058             return **outstream;
00059         }
00060 
00061         write_vector & operator ++ ()
00062         {
00063             ++it;
00064             ++(*outstream);
00065             return *this;
00066         }
00067 
00068         void flush()
00069         {
00070             ConstExtIterator const_out = it;
00071 
00072             while (const_out.block_offset())
00073             {
00074                 **outstream = *const_out; // might cause I/Os for loading the page that
00075                 ++const_out;              // contains data beyond out
00076                 ++(*outstream);
00077             }
00078 
00079             it.flush();
00080             delete outstream;
00081             outstream = NULL;
00082         }
00083 
00084         virtual ~write_vector()
00085         {
00086             if (outstream)
00087                 flush();
00088         }
00089     };
00090 }
00091 
00092 
00095 
00096 
00097 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
00098           unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
00099 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
00100                     stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond,
00101                     RandomNumberGenerator_ & rand,
00102                     unsigned_type M);
00103 
00104 
00114 template <typename ExtIterator_,
00115           typename RandomNumberGenerator_,
00116           unsigned BlockSize_,
00117           unsigned PageSize_,
00118           typename AllocStrategy_>
00119 void random_shuffle(ExtIterator_ first,
00120                     ExtIterator_ beyond,
00121                     RandomNumberGenerator_ & rand,
00122                     unsigned_type M,
00123                     AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY())
00124 {
00125     typedef typename ExtIterator_::value_type value_type;
00126     typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
00127                                             stxxl::grow_shrink2, PageSize_,
00128                                             BlockSize_, void, 0, AllocStrategy_>::result stack_type;
00129     typedef typename stack_type::block_type block_type;
00130 
00131     STXXL_VERBOSE1("random_shuffle: Plain Version");
00132 
00133     stxxl::int64 n = beyond - first; // the number of input elements
00134 
00135     // make sure we have at least 6 blocks + 1 page
00136     if (M < 6 * BlockSize_ + PageSize_ * BlockSize_)
00137         M = 6 * BlockSize_ + PageSize_ * BlockSize_;
00138 
00139 
00140     int_type k = M / (3 * BlockSize_); // number of buckets
00141 
00142 
00143     stxxl::int64 i, j, size = 0;
00144 
00145     value_type * temp_array;
00146     typedef typename stxxl::VECTOR_GENERATOR<value_type,
00147                                              PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
00148     temp_vector_type * temp_vector;
00149 
00150     stxxl::prefetch_pool<block_type> p_pool(0);               // no read buffers
00151     STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets");
00152     stxxl::write_pool<block_type> w_pool(M / BlockSize_ - k); // M/B-k write buffers
00153 
00154     stack_type ** buckets;
00155 
00156     // create and put buckets into container
00157     buckets = new stack_type *[k];
00158     for (j = 0; j < k; j++)
00159         buckets[j] = new stack_type(p_pool, w_pool, 0);
00160 
00161 
00163     typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
00164     input_stream in = stxxl::stream::streamify(first, beyond);
00165 
00166     // distribute input into random buckets
00167     int_type random_bucket = 0;
00168     for (i = 0; i < n; ++i) {
00169         random_bucket = rand(k);
00170         buckets[random_bucket]->push(*in); // reading the current input element
00171         ++in;                              // go to the next input element
00172     }
00173 
00175     // resize buffers
00176     w_pool.resize(0);
00177     p_pool.resize(PageSize_);
00178 
00179     // Set prefetch aggr to PageSize_
00180     for (int_type j = 0; j < k; j++) {
00181         buckets[j]->set_prefetch_aggr(PageSize_);
00182     }
00183 
00184     unsigned_type space_left = M - k * BlockSize_ -
00185                                PageSize_ * BlockSize_; // remaining int space
00186     ExtIterator_ Writer = first;
00187     ExtIterator_ it = first;
00188 
00189     for (i = 0; i < k; i++) {
00190         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
00191     }
00192 
00193     // shuffle each bucket
00194     for (i = 0; i < k; i++) {
00195         size = buckets[i]->size();
00196 
00197         // does the bucket fit into memory?
00198         if (size * sizeof(value_type) < space_left) {
00199             STXXL_VERBOSE1("random_shuffle: no recursion");
00200 
00201             // copy bucket into temp. array
00202             temp_array = new value_type[size];
00203             for (j = 0; j < size; j++) {
00204                 temp_array[j] = buckets[i]->top();
00205                 buckets[i]->pop();
00206             }
00207 
00208             // shuffle
00209             std::random_shuffle(temp_array, temp_array + size, rand);
00210 
00211             // write back
00212             for (j = 0; j < size; j++) {
00213                 *Writer = temp_array[j];
00214                 ++Writer;
00215             }
00216 
00217             // free memory
00218             delete[] temp_array;
00219         }
00220         else {
00221             STXXL_VERBOSE1("random_shuffle: recursion");
00222 
00223             // copy bucket into temp. stxxl::vector
00224             temp_vector = new temp_vector_type(size);
00225 
00226             for (j = 0; j < size; j++) {
00227                 (*temp_vector)[j] = buckets[i]->top();
00228                 buckets[i]->pop();
00229             }
00230 
00231             p_pool.resize(0);
00232             space_left += PageSize_ * BlockSize_;
00233             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
00234 
00235             // recursive shuffle
00236             stxxl::random_shuffle(temp_vector->begin(),
00237                                   temp_vector->end(), rand, space_left);
00238 
00239             p_pool.resize(PageSize_);
00240 
00241             // write back
00242             for (j = 0; j < size; j++) {
00243                 *Writer = (*temp_vector)[j];
00244                 ++Writer;
00245             }
00246 
00247             // free memory
00248             delete temp_vector;
00249         }
00250     }
00251 
00252     // free buckets
00253     for (int j = 0; j < k; j++)
00254         delete buckets[j];
00255 
00256     delete[] buckets;
00257 }
00258 
00264 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
00265           unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
00266 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
00267                     stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond,
00268                     RandomNumberGenerator_ & rand,
00269                     unsigned_type M)
00270 {
00271     typedef stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> ExtIterator_;
00272     typedef typename ExtIterator_::value_type value_type;
00273     typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
00274                                             stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type;
00275     typedef typename stack_type::block_type block_type;
00276 
00277     STXXL_VERBOSE1("random_shuffle: Vector Version");
00278 
00279     // make sure we have at least 6 blocks + 1 page
00280     if (M < 6 * BlockSize_ + PageSize_ * BlockSize_)
00281         M = 6 * BlockSize_ + PageSize_ * BlockSize_;
00282 
00283 
00284     stxxl::int64 n = beyond - first;   // the number of input elements
00285     int_type k = M / (3 * BlockSize_); // number of buckets
00286 
00287     stxxl::int64 i, j, size = 0;
00288 
00289     value_type * temp_array;
00290     typedef typename stxxl::VECTOR_GENERATOR<value_type,
00291                                              PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
00292     temp_vector_type * temp_vector;
00293 
00294     stxxl::prefetch_pool<block_type> p_pool(0);               // no read buffers
00295     stxxl::write_pool<block_type> w_pool(M / BlockSize_ - k); // M/B-k write buffers
00296 
00297     stack_type ** buckets;
00298 
00299     // create and put buckets into container
00300     buckets = new stack_type *[k];
00301     for (j = 0; j < k; j++)
00302         buckets[j] = new stack_type(p_pool, w_pool, 0);
00303 
00304 
00306     typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
00307     input_stream in = stxxl::stream::streamify(first, beyond);
00308 
00309     // distribute input into random buckets
00310     int_type random_bucket = 0;
00311     for (i = 0; i < n; ++i) {
00312         random_bucket = rand(k);
00313         buckets[random_bucket]->push(*in); // reading the current input element
00314         ++in;                              // go to the next input element
00315     }
00316 
00318     // resize buffers
00319     w_pool.resize(0);
00320     p_pool.resize(PageSize_);
00321 
00322     // Set prefetch aggr to PageSize_
00323     for (j = 0; j < k; j++) {
00324         buckets[j]->set_prefetch_aggr(PageSize_);
00325     }
00326 
00327     unsigned_type space_left = M - k * BlockSize_ -
00328                                PageSize_ * BlockSize_; // remaining int space
00329     random_shuffle_local::write_vector<ExtIterator_>
00330     Writer(first, 2 * PageSize_);
00331     ExtIterator_ it = first;
00332 
00333     for (i = 0; i < k; i++) {
00334         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
00335     }
00336 
00337     // shuffle each bucket
00338     for (i = 0; i < k; i++) {
00339         size = buckets[i]->size();
00340 
00341         // does the bucket fit into memory?
00342         if (size * sizeof(value_type) < space_left) {
00343             STXXL_VERBOSE1("random_shuffle: no recursion");
00344 
00345             // copy bucket into temp. array
00346             temp_array = new value_type[size];
00347             for (j = 0; j < size; j++) {
00348                 temp_array[j] = buckets[i]->top();
00349                 buckets[i]->pop();
00350             }
00351 
00352             // shuffle
00353             std::random_shuffle(temp_array, temp_array + size, rand);
00354 
00355             // write back
00356             for (j = 0; j < size; j++) {
00357                 *Writer = temp_array[j];
00358                 ++Writer;
00359             }
00360 
00361             // free memory
00362             delete[] temp_array;
00363         }
00364         else {
00365             STXXL_VERBOSE1("random_shuffle: recursion");
00366             // copy bucket into temp. stxxl::vector
00367             temp_vector = new temp_vector_type(size);
00368 
00369             for (j = 0; j < size; j++) {
00370                 (*temp_vector)[j] = buckets[i]->top();
00371                 buckets[i]->pop();
00372             }
00373 
00374             p_pool.resize(0);
00375             space_left += PageSize_ * BlockSize_;
00376 
00377             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
00378 
00379             // recursive shuffle
00380             stxxl::random_shuffle(temp_vector->begin(),
00381                                   temp_vector->end(), rand, space_left);
00382 
00383             p_pool.resize(PageSize_);
00384 
00385             // write back
00386             for (j = 0; j < size; j++) {
00387                 *Writer = (*temp_vector)[j];
00388                 ++Writer;
00389             }
00390 
00391             // free memory
00392             delete temp_vector;
00393         }
00394     }
00395 
00396     // free buckets
00397     for (int j = 0; j < k; j++)
00398         delete buckets[j];
00399 
00400     delete[] buckets;
00401 
00402     Writer.flush(); // flush buffers
00403 }
00404 
00409 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
00410           unsigned BlockSize_, typename PgTp_, unsigned PageSize_>
00411 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
00412                     stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond,
00413                     unsigned_type M)
00414 {
00415     stxxl::random_number<> rand;
00416     stxxl::random_shuffle(first, beyond, rand, M);
00417 }
00418 
00420 
00421 __STXXL_END_NAMESPACE
00422 
00423 #endif // !STXXL_RANDOM_SHUFFLE_HEADER

Generated by  doxygen 1.7.1