STXXL  1.4.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
random_shuffle.h
Go to the documentation of this file.
1 /***************************************************************************
2  * include/stxxl/bits/algo/random_shuffle.h
3  *
4  * Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  * Copyright (C) 2007 Manuel Krings
7  * Copyright (C) 2007 Markus Westphal <[email protected]>
8  * Copyright (C) 2009, 2010 Andreas Beckmann <[email protected]>
9  *
10  * Distributed under the Boost Software License, Version 1.0.
11  * (See accompanying file LICENSE_1_0.txt or copy at
12  * http://www.boost.org/LICENSE_1_0.txt)
13  **************************************************************************/
14 
15 #ifndef STXXL_ALGO_RANDOM_SHUFFLE_HEADER
16 #define STXXL_ALGO_RANDOM_SHUFFLE_HEADER
17 
18 // TODO: improve main memory consumption in recursion
19 // (free stacks buffers)
20 // TODO: shuffle small input in internal memory
21 
23 #include <stxxl/scan>
24 #include <stxxl/stack>
25 
27 
28 //! \addtogroup stlalgo
29 //! \{
30 
31 //! External equivalent of std::random_shuffle
32 //! \param first begin of the range to shuffle
33 //! \param last end of the range to shuffle
34 //! \param rand random number generator object (functor)
35 //! \param M number of bytes for internal use
36 //! \param AS parallel disk allocation strategy
37 //!
38 //! - BlockSize size of the block to use for external memory data structures
39 //! - PageSize page size in blocks to use for external memory data structures
40 template <typename ExtIterator,
41  typename RandomNumberGenerator,
42  unsigned BlockSize,
43  unsigned PageSize,
44  typename AllocStrategy>
45 void random_shuffle(ExtIterator first,
46  ExtIterator last,
47  RandomNumberGenerator& rand,
48  unsigned_type M,
49  AllocStrategy AS = STXXL_DEFAULT_ALLOC_STRATEGY())
50 {
51  STXXL_UNUSED(AS); // FIXME: Why is this not being used?
52  typedef typename ExtIterator::value_type value_type;
53  typedef typename STACK_GENERATOR<
54  value_type, external, grow_shrink2, PageSize,
55  BlockSize, void, 0, AllocStrategy
56  >::result stack_type;
57  typedef typename stack_type::block_type block_type;
58 
59  STXXL_VERBOSE1("random_shuffle: Plain Version");
60  STXXL_STATIC_ASSERT(int(BlockSize) < 0 && "This implementation was never tested. Please report to the stxxl developers if you have an ExtIterator that works with this implementation.");
61 
62  int64 n = last - first; // the number of input elements
63 
64  // make sure we have at least 6 blocks + 1 page
65  if (M < 6 * BlockSize + PageSize * BlockSize) {
66  STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
67  M = 6 * BlockSize + PageSize * BlockSize;
68  STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
69  }
70 
71  int_type k = M / (3 * BlockSize); // number of buckets
72 
73  int64 i, j, size = 0;
74 
75  value_type* temp_array;
76  typedef typename VECTOR_GENERATOR<
77  value_type, PageSize, 4, BlockSize, AllocStrategy
78  >::result temp_vector_type;
79  temp_vector_type* temp_vector;
80 
81  STXXL_VERBOSE1("random_shuffle: " << M / BlockSize - k << " write buffers for " << k << " buckets");
82  read_write_pool<block_type> pool(0, M / BlockSize - k); // no read buffers and M/B-k write buffers
83 
84  stack_type** buckets;
85 
86  // create and put buckets into container
87  buckets = new stack_type*[k];
88  for (j = 0; j < k; j++)
89  buckets[j] = new stack_type(pool, 0);
90 
91  ///// Reading input /////////////////////
92  typedef typename stream::streamify_traits<ExtIterator>::stream_type input_stream;
93  input_stream in = stream::streamify(first, last);
94 
95  // distribute input into random buckets
96  int_type random_bucket = 0;
97  for (i = 0; i < n; ++i) {
98  random_bucket = rand(k);
99  buckets[random_bucket]->push(*in); // reading the current input element
100  ++in; // go to the next input element
101  }
102 
103  ///// Processing //////////////////////
104  // resize buffers
105  pool.resize_write(0);
106  pool.resize_prefetch(PageSize);
107 
108  unsigned_type space_left = M - k * BlockSize -
109  PageSize * BlockSize; // remaining int space
110  ExtIterator Writer = first;
111  ExtIterator it = first;
112 
113  for (i = 0; i < k; i++) {
114  STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
115  }
116 
117  // shuffle each bucket
118  for (i = 0; i < k; i++) {
119  buckets[i]->set_prefetch_aggr(PageSize);
120  size = buckets[i]->size();
121 
122  // does the bucket fit into memory?
123  if (size * sizeof(value_type) < space_left) {
124  STXXL_VERBOSE1("random_shuffle: no recursion");
125 
126  // copy bucket into temp. array
127  temp_array = new value_type[size];
128  for (j = 0; j < size; j++) {
129  temp_array[j] = buckets[i]->top();
130  buckets[i]->pop();
131  }
132 
133  // shuffle
135  random_shuffle(temp_array, temp_array + size, rand);
136 
137  // write back
138  for (j = 0; j < size; j++) {
139  *Writer = temp_array[j];
140  ++Writer;
141  }
142 
143  // free memory
144  delete[] temp_array;
145  }
146  else {
147  STXXL_VERBOSE1("random_shuffle: recursion");
148 
149  // copy bucket into temp. stxxl::vector
150  temp_vector = new temp_vector_type(size);
151 
152  for (j = 0; j < size; j++) {
153  (*temp_vector)[j] = buckets[i]->top();
154  buckets[i]->pop();
155  }
156 
157  pool.resize_prefetch(0);
158  space_left += PageSize * BlockSize;
159  STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
160 
161  // recursive shuffle
162  stxxl::random_shuffle(temp_vector->begin(),
163  temp_vector->end(), rand, space_left);
164 
165  pool.resize_prefetch(PageSize);
166 
167  // write back
168  for (j = 0; j < size; j++) {
169  *Writer = (*temp_vector)[j];
170  ++Writer;
171  }
172 
173  // free memory
174  delete temp_vector;
175  }
176 
177  // free bucket
178  delete buckets[i];
179  space_left += BlockSize;
180  }
181 
182  delete[] buckets;
183 }
184 
185 //! External equivalent of std::random_shuffle (specialization for stxxl::vector)
186 //! \param first begin of the range to shuffle
187 //! \param last end of the range to shuffle
188 //! \param rand random number generator object (functor)
189 //! \param M number of bytes for internal use
190 template <typename Type, typename AllocStrategy, typename SizeType, typename DiffType,
191  unsigned BlockSize, typename PageType, unsigned PageSize, typename RandomNumberGenerator>
193  stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
194  BlockSize, PageType, PageSize> first,
195  stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
196  BlockSize, PageType, PageSize> last,
197  RandomNumberGenerator& rand,
198  unsigned_type M)
199 {
201  typedef typename ExtIterator::value_type value_type;
202  typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
203  typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
204  stxxl::grow_shrink2, PageSize, BlockSize>::result stack_type;
205  typedef typename stack_type::block_type block_type;
206 
207  STXXL_VERBOSE1("random_shuffle: Vector Version");
208 
209  // make sure we have at least 6 blocks + 1 page
210  if (M < 6 * BlockSize + PageSize * BlockSize) {
211  STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
212  M = 6 * BlockSize + PageSize * BlockSize;
213  STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
214  }
215 
216  stxxl::int64 n = last - first; // the number of input elements
217  int_type k = M / (3 * BlockSize); // number of buckets
218 
219  stxxl::int64 i, j, size = 0;
220 
221  value_type* temp_array;
222  typedef typename stxxl::VECTOR_GENERATOR<
223  value_type, PageSize, 4, BlockSize, AllocStrategy
224  >::result temp_vector_type;
225  temp_vector_type* temp_vector;
226 
227  // no read buffers and M/B-k write buffers
228  stxxl::read_write_pool<block_type> pool(0, M / BlockSize - k);
229 
230  stack_type** buckets;
231 
232  // create and put buckets into container
233  buckets = new stack_type*[k];
234  for (j = 0; j < k; j++)
235  buckets[j] = new stack_type(pool, 0);
236 
237  typedef buf_istream<block_type, bids_container_iterator> buf_istream_type;
238  typedef buf_ostream<block_type, bids_container_iterator> buf_ostream_type;
239 
240  first.flush(); // flush container
241 
242  // create prefetching stream,
243  buf_istream_type in(first.bid(), last.bid() + ((last.block_offset()) ? 1 : 0), 2);
244  // create buffered write stream for blocks
245  buf_ostream_type out(first.bid(), 2);
246 
247  ExtIterator _cur = first - first.block_offset();
248 
249  // leave part of the block before _begin untouched (e.g. copy)
250  for ( ; _cur != first; ++_cur)
251  {
252  typename ExtIterator::value_type tmp;
253  in >> tmp;
254  out << tmp;
255  }
256 
257  ///// Reading input /////////////////////
258 
259  // distribute input into random buckets
260  int_type random_bucket = 0;
261  for (i = 0; i < n; ++i, ++_cur) {
262  random_bucket = rand((unsigned)k);
263  typename ExtIterator::value_type tmp;
264  in >> tmp;
265  buckets[random_bucket]->push(tmp); // reading the current input element
266  }
267 
268  ///// Processing //////////////////////
269  // resize buffers
270  pool.resize_write(0);
271  pool.resize_prefetch(PageSize);
272 
273  // remaining int space
274  unsigned_type space_left = M - k * BlockSize - PageSize * BlockSize;
275 
276  for (i = 0; i < k; i++) {
277  STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
278  }
279 
280  // shuffle each bucket
281  for (i = 0; i < k; i++) {
282  buckets[i]->set_prefetch_aggr(PageSize);
283  size = buckets[i]->size();
284 
285  // does the bucket fit into memory?
286  if (size * sizeof(value_type) < space_left) {
287  STXXL_VERBOSE1("random_shuffle: no recursion");
288 
289  // copy bucket into temp. array
290  temp_array = new value_type[(size_t)size];
291  for (j = 0; j < size; j++) {
292  temp_array[j] = buckets[i]->top();
293  buckets[i]->pop();
294  }
295 
296  // shuffle
298  random_shuffle(temp_array, temp_array + size, rand);
299 
300  // write back
301  for (j = 0; j < size; j++) {
302  typename ExtIterator::value_type tmp;
303  tmp = temp_array[j];
304  out << tmp;
305  }
306 
307  // free memory
308  delete[] temp_array;
309  } else {
310  STXXL_VERBOSE1("random_shuffle: recursion");
311  // copy bucket into temp. stxxl::vector
312  temp_vector = new temp_vector_type(size);
313 
314  for (j = 0; j < size; j++) {
315  (*temp_vector)[j] = buckets[i]->top();
316  buckets[i]->pop();
317  }
318 
319  pool.resize_prefetch(0);
320  space_left += PageSize * BlockSize;
321 
322  STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
323 
324  // recursive shuffle
325  stxxl::random_shuffle(temp_vector->begin(),
326  temp_vector->end(), rand, space_left);
327 
328  pool.resize_prefetch(PageSize);
329 
330  // write back
331  for (j = 0; j < size; j++) {
332  typename ExtIterator::value_type tmp;
333  tmp = (*temp_vector)[j];
334  out << tmp;
335  }
336 
337  // free memory
338  delete temp_vector;
339  }
340 
341  // free bucket
342  delete buckets[i];
343  space_left += BlockSize;
344  }
345 
346  delete[] buckets;
347 
348  // leave part of the block after _end untouched
349  if (last.block_offset())
350  {
351  ExtIterator last_block_end = last + (block_type::size - last.block_offset());
352  for ( ; _cur != last_block_end; ++_cur)
353  {
354  typename ExtIterator::value_type tmp;
355  in >> tmp;
356  out << tmp;
357  }
358  }
359 }
360 
361 //! External equivalent of std::random_shuffle (specialization for stxxl::vector)
362 //! \param first begin of the range to shuffle
363 //! \param last end of the range to shuffle
364 //! \param M number of bytes for internal use
365 template <typename Type, typename AllocStrategy, typename SizeType, typename DiffType,
366  unsigned BlockSize, typename PageType, unsigned PageSize>
367 inline
369  stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
370  BlockSize, PageType, PageSize> first,
371  stxxl::vector_iterator<Type, AllocStrategy, SizeType, DiffType,
372  BlockSize, PageType, PageSize> last,
373  unsigned_type M)
374 {
376  stxxl::random_shuffle(first, last, rand, M);
377 }
378 
379 //! \}
380 
382 
383 #endif // !STXXL_ALGO_RANDOM_SHUFFLE_HEADER
384 // vim: et:ts=4:sw=4
iterator2stream< InputIterator > streamify(InputIterator begin, InputIterator end)
Input iterator range to stream converter.
Definition: stream.h:93
Uniform [0, N) pseudo-random generator.
Definition: rand.h:247
long long int int64
Definition: types.h:38
A model of stream that retrieves the data from an input iterator. For convenience use streamify funct...
Definition: stream.h:46
#define STXXL_DEFAULT_ALLOC_STRATEGY
Definition: block_alloc.h:258
Buffered output stream.
Definition: buf_ostream.h:29
self_type & flush()
Force flush of current block, for finishing writing within a block.
Definition: buf_ostream.h:109
Implements dynamically resizable buffered writing and prefetched reading pool.
choose_int_types< my_pointer_size >::int_type int_type
Definition: types.h:63
void random_shuffle(ExtIterator first, ExtIterator last, RandomNumberGenerator &rand, unsigned_type M, AllocStrategy AS=STXXL_DEFAULT_ALLOC_STRATEGY())
External equivalent of std::random_shuffle.
void resize_prefetch(size_type new_size)
Resizes size of the pool.
#define STXXL_BEGIN_NAMESPACE
Definition: namespace.h:16
void STXXL_UNUSED(const U &)
Definition: unused.h:22
Buffered input stream.
Definition: buf_istream.h:34
#define STXXL_VERBOSE1(x)
Definition: verbose.h:99
External vector iterator, model of ext_random_access_iterator concept.
Definition: vector.h:275
#define STXXL_STATIC_ASSERT(x)
Definition: utils.h:48
#define STXXL_ERRMSG(x)
Definition: verbose.h:80
choose_int_types< my_pointer_size >::unsigned_type unsigned_type
Definition: types.h:64
Stack type generator Introduction to stack container: see STXXL Stack tutorial. Design and Internals of stack container: see Stack.
Definition: stack.h:1011
void resize_write(size_type new_size)
Resizes size of the pool.
External vector type generator.
Definition: vector.h:2602
#define STXXL_END_NAMESPACE
Definition: namespace.h:17