STXXL  1.4.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
random_shuffle.h
Go to the documentation of this file.
1 /***************************************************************************
2  * include/stxxl/bits/algo/random_shuffle.h
3  *
4  * Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  * Copyright (C) 2007 Manuel Krings
7  * Copyright (C) 2007 Markus Westphal <[email protected]>
8  * Copyright (C) 2009, 2010 Andreas Beckmann <[email protected]>
9  *
10  * Distributed under the Boost Software License, Version 1.0.
11  * (See accompanying file LICENSE_1_0.txt or copy at
12  * http://www.boost.org/LICENSE_1_0.txt)
13  **************************************************************************/
14 
15 #ifndef STXXL_ALGO_RANDOM_SHUFFLE_HEADER
16 #define STXXL_ALGO_RANDOM_SHUFFLE_HEADER
17 
18 // TODO: improve main memory consumption in recursion
19 // (free stacks buffers)
20 // TODO: shuffle small input in internal memory
21 
22 
24 #include <stxxl/scan>
25 #include <stxxl/stack>
26 
27 
29 
30 
31 //! \addtogroup stlalgo
32 //! \{
33 
34 
35 //! External equivalent of std::random_shuffle
36 //! \param first begin of the range to shuffle
37 //! \param last end of the range to shuffle
38 //! \param rand random number generator object (functor)
39 //! \param M number of bytes for internal use
40 //! \param AS parallel disk allocation strategy
41 //!
42 //! - BlockSize_ size of the block to use for external memory data structures
43 //! - PageSize_ page size in blocks to use for external memory data structures
44 template <typename ExtIterator_,
45  typename RandomNumberGenerator_,
46  unsigned BlockSize_,
47  unsigned PageSize_,
48  typename AllocStrategy_>
49 void random_shuffle(ExtIterator_ first,
50  ExtIterator_ last,
51  RandomNumberGenerator_& rand,
52  unsigned_type M,
53  AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY())
54 {
55  STXXL_UNUSED(AS); // FIXME: Why is this not being used?
56  typedef typename ExtIterator_::value_type value_type;
57  typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
58  stxxl::grow_shrink2, PageSize_,
59  BlockSize_, void, 0, AllocStrategy_>::result stack_type;
60  typedef typename stack_type::block_type block_type;
61 
62  STXXL_VERBOSE1("random_shuffle: Plain Version");
63  STXXL_STATIC_ASSERT(int(BlockSize_) < 0 && "This implementation was never tested. Please report to the stxxl developers if you have an ExtIterator_ that works with this implementation.");
64 
65  stxxl::int64 n = last - first; // the number of input elements
66 
67  // make sure we have at least 6 blocks + 1 page
68  if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
69  STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
70  M = 6 * BlockSize_ + PageSize_ * BlockSize_;
71  STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
72  }
73 
74  int_type k = M / (3 * BlockSize_); // number of buckets
75 
76 
77  stxxl::int64 i, j, size = 0;
78 
79  value_type* temp_array;
80  typedef typename stxxl::VECTOR_GENERATOR<value_type,
81  PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
82  temp_vector_type* temp_vector;
83 
84  STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets");
85  stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k); // no read buffers and M/B-k write buffers
86 
87  stack_type** buckets;
88 
89  // create and put buckets into container
90  buckets = new stack_type*[k];
91  for (j = 0; j < k; j++)
92  buckets[j] = new stack_type(pool, 0);
93 
94 
95  ///// Reading input /////////////////////
96  typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
97  input_stream in = stxxl::stream::streamify(first, last);
98 
99  // distribute input into random buckets
100  int_type random_bucket = 0;
101  for (i = 0; i < n; ++i) {
102  random_bucket = rand(k);
103  buckets[random_bucket]->push(*in); // reading the current input element
104  ++in; // go to the next input element
105  }
106 
107  ///// Processing //////////////////////
108  // resize buffers
109  pool.resize_write(0);
110  pool.resize_prefetch(PageSize_);
111 
112  unsigned_type space_left = M - k * BlockSize_ -
113  PageSize_ * BlockSize_; // remaining int space
114  ExtIterator_ Writer = first;
115  ExtIterator_ it = first;
116 
117  for (i = 0; i < k; i++) {
118  STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
119  }
120 
121  // shuffle each bucket
122  for (i = 0; i < k; i++) {
123  buckets[i]->set_prefetch_aggr(PageSize_);
124  size = buckets[i]->size();
125 
126  // does the bucket fit into memory?
127  if (size * sizeof(value_type) < space_left) {
128  STXXL_VERBOSE1("random_shuffle: no recursion");
129 
130  // copy bucket into temp. array
131  temp_array = new value_type[size];
132  for (j = 0; j < size; j++) {
133  temp_array[j] = buckets[i]->top();
134  buckets[i]->pop();
135  }
136 
137  // shuffle
139  random_shuffle(temp_array, temp_array + size, rand);
140 
141  // write back
142  for (j = 0; j < size; j++) {
143  *Writer = temp_array[j];
144  ++Writer;
145  }
146 
147  // free memory
148  delete[] temp_array;
149  }
150  else {
151  STXXL_VERBOSE1("random_shuffle: recursion");
152 
153  // copy bucket into temp. stxxl::vector
154  temp_vector = new temp_vector_type(size);
155 
156  for (j = 0; j < size; j++) {
157  (*temp_vector)[j] = buckets[i]->top();
158  buckets[i]->pop();
159  }
160 
161  pool.resize_prefetch(0);
162  space_left += PageSize_ * BlockSize_;
163  STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
164 
165  // recursive shuffle
166  stxxl::random_shuffle(temp_vector->begin(),
167  temp_vector->end(), rand, space_left);
168 
169  pool.resize_prefetch(PageSize_);
170 
171  // write back
172  for (j = 0; j < size; j++) {
173  *Writer = (*temp_vector)[j];
174  ++Writer;
175  }
176 
177  // free memory
178  delete temp_vector;
179  }
180 
181  // free bucket
182  delete buckets[i];
183  space_left += BlockSize_;
184  }
185 
186  delete[] buckets;
187 }
188 
189 //! External equivalent of std::random_shuffle (specialization for stxxl::vector)
190 //! \param first begin of the range to shuffle
191 //! \param last end of the range to shuffle
192 //! \param rand random number generator object (functor)
193 //! \param M number of bytes for internal use
194 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
195  unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
198  RandomNumberGenerator_& rand,
199  unsigned_type M)
200 {
202  typedef typename ExtIterator_::value_type value_type;
203  typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
204  stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type;
205  typedef typename stack_type::block_type block_type;
206 
207  STXXL_VERBOSE1("random_shuffle: Vector Version");
208 
209  // make sure we have at least 6 blocks + 1 page
210  if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
211  STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
212  M = 6 * BlockSize_ + PageSize_ * BlockSize_;
213  STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
214  }
215 
216  stxxl::int64 n = last - first; // the number of input elements
217  int_type k = M / (3 * BlockSize_); // number of buckets
218 
219  stxxl::int64 i, j, size = 0;
220 
221  value_type* temp_array;
222  typedef typename stxxl::VECTOR_GENERATOR<value_type,
223  PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
224  temp_vector_type* temp_vector;
225 
226  stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k); // no read buffers and M/B-k write buffers
227 
228  stack_type** buckets;
229 
230  // create and put buckets into container
231  buckets = new stack_type*[k];
232  for (j = 0; j < k; j++)
233  buckets[j] = new stack_type(pool, 0);
234 
235 
238 
239  first.flush(); // flush container
240 
241  // create prefetching stream,
242  buf_istream_type in(first.bid(), last.bid() + ((last.block_offset()) ? 1 : 0), 2);
243  // create buffered write stream for blocks
244  buf_ostream_type out(first.bid(), 2);
245 
246  ExtIterator_ _cur = first - first.block_offset();
247 
248  // leave part of the block before _begin untouched (e.g. copy)
249  for ( ; _cur != first; ++_cur)
250  {
251  typename ExtIterator_::value_type tmp;
252  in >> tmp;
253  out << tmp;
254  }
255 
256  ///// Reading input /////////////////////
257 
258  // distribute input into random buckets
259  int_type random_bucket = 0;
260  for (i = 0; i < n; ++i, ++_cur) {
261  random_bucket = rand((unsigned)k);
262  typename ExtIterator_::value_type tmp;
263  in >> tmp;
264  buckets[random_bucket]->push(tmp); // reading the current input element
265  }
266 
267  ///// Processing //////////////////////
268  // resize buffers
269  pool.resize_write(0);
270  pool.resize_prefetch(PageSize_);
271 
272  unsigned_type space_left = M - k * BlockSize_ -
273  PageSize_ * BlockSize_; // remaining int space
274 
275  for (i = 0; i < k; i++) {
276  STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
277  }
278 
279  // shuffle each bucket
280  for (i = 0; i < k; i++) {
281  buckets[i]->set_prefetch_aggr(PageSize_);
282  size = buckets[i]->size();
283 
284  // does the bucket fit into memory?
285  if (size * sizeof(value_type) < space_left) {
286  STXXL_VERBOSE1("random_shuffle: no recursion");
287 
288  // copy bucket into temp. array
289  temp_array = new value_type[size];
290  for (j = 0; j < size; j++) {
291  temp_array[j] = buckets[i]->top();
292  buckets[i]->pop();
293  }
294 
295  // shuffle
297  random_shuffle(temp_array, temp_array + size, rand);
298 
299  // write back
300  for (j = 0; j < size; j++) {
301  typename ExtIterator_::value_type tmp;
302  tmp = temp_array[j];
303  out << tmp;
304  }
305 
306  // free memory
307  delete[] temp_array;
308  } else {
309  STXXL_VERBOSE1("random_shuffle: recursion");
310  // copy bucket into temp. stxxl::vector
311  temp_vector = new temp_vector_type(size);
312 
313  for (j = 0; j < size; j++) {
314  (*temp_vector)[j] = buckets[i]->top();
315  buckets[i]->pop();
316  }
317 
318  pool.resize_prefetch(0);
319  space_left += PageSize_ * BlockSize_;
320 
321  STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
322 
323  // recursive shuffle
324  stxxl::random_shuffle(temp_vector->begin(),
325  temp_vector->end(), rand, space_left);
326 
327  pool.resize_prefetch(PageSize_);
328 
329  // write back
330  for (j = 0; j < size; j++) {
331  typename ExtIterator_::value_type tmp;
332  tmp = (*temp_vector)[j];
333  out << tmp;
334  }
335 
336  // free memory
337  delete temp_vector;
338  }
339 
340  // free bucket
341  delete buckets[i];
342  space_left += BlockSize_;
343  }
344 
345  delete[] buckets;
346 
347  // leave part of the block after _end untouched
348  if (last.block_offset())
349  {
350  ExtIterator_ _last_block_end = last + (block_type::size - last.block_offset());
351  for ( ; _cur != _last_block_end; ++_cur)
352  {
353  typename ExtIterator_::value_type tmp;
354  in >> tmp;
355  out << tmp;
356  }
357  }
358 }
359 
360 //! External equivalent of std::random_shuffle (specialization for stxxl::vector)
361 //! \param first begin of the range to shuffle
362 //! \param last end of the range to shuffle
363 //! \param M number of bytes for internal use
364 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
365  unsigned BlockSize_, typename PgTp_, unsigned PageSize_>
366 inline
369  unsigned_type M)
370 {
372  stxxl::random_shuffle(first, last, rand, M);
373 }
374 
375 //! \}
376 
378 
379 #endif // !STXXL_ALGO_RANDOM_SHUFFLE_HEADER
380 // vim: et:ts=4:sw=4
Uniform [0, N) pseudo-random generator.
Definition: rand.h:245
void random_shuffle(ExtIterator_ first, ExtIterator_ last, RandomNumberGenerator_ &rand, unsigned_type M, AllocStrategy_ AS=STXXL_DEFAULT_ALLOC_STRATEGY())
External equivalent of std::random_shuffle.
long long int int64
Definition: types.h:40
A model of stream that retrieves the data from an input iterator. For convenience use streamify funct...
Definition: stream.h:71
_Self & flush()
Force flush of current block, for finishing writing within a block.
Definition: buf_ostream.h:111
#define STXXL_DEFAULT_ALLOC_STRATEGY
Definition: block_alloc.h:259
Buffered output stream.
Definition: buf_ostream.h:31
Implements dynamically resizable buffered writing and prefetched reading pool.
choose_int_types< my_pointer_size >::int_type int_type
Definition: types.h:66
void resize_prefetch(size_type new_size)
Resizes size of the pool.
#define STXXL_BEGIN_NAMESPACE
Definition: namespace.h:16
void STXXL_UNUSED(const U &)
Definition: unused.h:23
Buffered input stream.
Definition: buf_istream.h:37
#define STXXL_VERBOSE1(x)
Definition: verbose.h:99
External vector iterator, model of ext_random_access_iterator concept.
Definition: vector.h:275
#define STXXL_STATIC_ASSERT(x)
Definition: utils.h:49
bids_container_iterator bid() const
return iterator to BID containg current element
Definition: vector.h:344
#define STXXL_ERRMSG(x)
Definition: verbose.h:79
choose_int_types< my_pointer_size >::unsigned_type unsigned_type
Definition: types.h:67
block_offset_type block_offset() const
return block offset of current element
Definition: vector.h:339
Stack type generator Introduction to stack container: see STXXL Stack tutorial. Design and Internals of stack container: see Stack.
Definition: stack.h:1021
iterator2stream< InputIterator_ > streamify(InputIterator_ begin, InputIterator_ end)
Input iterator range to stream converter.
Definition: stream.h:116
void resize_write(size_type new_size)
Resizes size of the pool.
External vector type generator.
Definition: vector.h:2580
#define STXXL_END_NAMESPACE
Definition: namespace.h:17