STXXL  1.4-dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
util.h
Go to the documentation of this file.
1 /***************************************************************************
2  * include/stxxl/bits/containers/hash_map/util.h
3  *
4  * Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  * Copyright (C) 2007 Markus Westphal <[email protected]>
7  * Copyright (C) 2014 Timo Bingmann <[email protected]>
8  *
9  * Distributed under the Boost Software License, Version 1.0.
10  * (See accompanying file LICENSE_1_0.txt or copy at
11  * http://www.boost.org/LICENSE_1_0.txt)
12  **************************************************************************/
13 
14 #ifndef STXXL_CONTAINERS_HASH_MAP_UTIL_HEADER
15 #define STXXL_CONTAINERS_HASH_MAP_UTIL_HEADER
16 #define STXXL_CONTAINERS_HASHMAP__UTIL_H
17 
20 
23 
25 
26 namespace hash_map {
27 
28 // For internal memory chaining: struct to compose next-pointer and delete-flag
29 // share the same memory: the lowest bit is occupied by the del-flag.
30 template <class ValueType>
31 struct node
32 {
34  ValueType value_;
35 
36  //! check if the next node is deleted.
37  bool deleted()
38  {
39  return ((int_type)next_and_del_ & 0x01) == 1;
40  }
41  //! change deleted flag on the next node
42  bool set_deleted(bool d)
43  {
44  next_and_del_ = (node<ValueType>*)(((int_type)next_and_del_ & ~0x01) | (int_type)d);
45  return d;
46  }
47 
48  //! return the next node, without the "next" flag.
50  {
51  return (node<ValueType>*)((int_type)next_and_del_ & ~0x01);
52  }
53  //! change the "next" value of next node pointer
55  {
56  next_and_del_ = (node<ValueType>*)(((int_type)next_and_del_ & 0x01) | (int_type)n);
57  return n;
58  }
59 };
60 
61 template <class NodeType>
62 struct bucket
63 {
64  //! entry point to the chain in internal memory
65  NodeType* list_;
66 
67  //! number of elements in external memory
69 
70  //! index of first block's bid (to be used as index for hash_map's
71  //! bids_-array
73 
74  //! index of first subblock
76 
78  : list_(NULL),
79  n_external_(0),
80  i_block_(0),
81  i_subblock_(0)
82  { }
83 
84  bucket(NodeType* list, external_size_type n_external,
85  internal_size_type i_block, internal_size_type i_subblock)
86  : list_(list),
87  n_external_(n_external),
88  i_block_(i_block),
89  i_subblock_(i_subblock)
90  { }
91 };
92 
93 //! Used to scan external memory with prefetching.
94 template <class CacheType, class BidIterator>
96 {
97 public:
98  typedef CacheType cache_type;
99  typedef BidIterator bid_iterator;
100 
101  typedef typename cache_type::block_type block_type;
102  typedef typename block_type::value_type subblock_type;
103  typedef typename subblock_type::value_type value_type;
104 
105  typedef typename bid_iterator::value_type bid_type;
106 
107  enum { block_size = block_type::size, subblock_size = subblock_type::size };
108 
109 private:
110  //! index within current block
112  //! points to the beginning of the block-sequence
114  //! points to the current block
116  //! points to the end of the block-sequence
118  //! points to the next block to prefetch
120 
121  //! shared block-cache
123 
124  //! true if prefetching enabled
125  bool prefetch_;
126  //! pages, which are read at once from disk, consist of this many blocks
128  //! number of pages to prefetch
130 
131  //! current block dirty ?
132  bool dirty_;
133  //! current subblock
135 
136 public:
137  //! Create a new buffered reader to read the blocks in [seq_begin, seq_end)
138  //! \param seq_begin First block's bid
139  //! \param seq_end Last block's bid
140  //! \param cache Block-cache used for prefetching
141  //! \param i_subblock Start reading from this subblock
142  //! \param prefetch Enable/Disable prefetching
144  cache_type& cache,
145  internal_size_type i_subblock = 0, bool prefetch = true)
146  : i_value_(0),
147  begin_bid_(seq_begin),
148  curr_bid_(seq_begin),
149  end_bid_(seq_end),
150  cache_(cache),
151  prefetch_(false),
152  page_size_(tuning::get_instance()->prefetch_page_size),
153  prefetch_pages_(tuning::get_instance()->prefetch_pages),
154  dirty_(false),
155  subblock_(NULL)
156  {
157  if (seq_begin == seq_end)
158  return;
159 
160  if (prefetch)
161  enable_prefetching();
162 
163  // will (amongst other things) set subblock_ and retain current block
164  skip_to(seq_begin, i_subblock);
165  }
166 
168  {
169  if (curr_bid_ != end_bid_)
170  cache_.release_block(*curr_bid_);
171  }
172 
174  {
175  if (prefetch_)
176  return;
177 
178  prefetch_ = true;
179  pref_bid_ = curr_bid_;
180  // start prefetching page_size*prefetch_pages blocks beginning with current one
181  for (unsigned_type i = 0; i < page_size_ * prefetch_pages_; i++)
182  {
183  if (pref_bid_ == end_bid_)
184  break;
185 
186  cache_.prefetch_block(*pref_bid_);
187  ++pref_bid_;
188  }
189  }
190 
191  //! Get const-reference to current value.
193  {
194  return (*subblock_)[i_value_ % subblock_size];
195  }
196 
197  //! Get reference to current value. The current value's block's dirty flag
198  //! will be set.
200  {
201  if (!dirty_) {
202  cache_.make_dirty(*curr_bid_);
203  dirty_ = true;
204  }
205 
206  return (*subblock_)[i_value_ % subblock_size];
207  }
208 
209  //! Advance to the next value
210  //! \return false if last value has been reached, otherwise true.
212  {
213  if (curr_bid_ == end_bid_)
214  return false;
215 
216  // same block
217  if (i_value_ + 1 < block_size * subblock_size)
218  {
219  i_value_++;
220  }
221  // entered new block
222  else
223  {
224  cache_.release_block(*curr_bid_);
225 
226  i_value_ = 0;
227  dirty_ = false;
228  ++curr_bid_;
229 
230  if (curr_bid_ == end_bid_)
231  return false;
232 
233  cache_.retain_block(*curr_bid_);
234 
235  // if a complete page has been consumed, prefetch the next one
236  if (prefetch_ && (curr_bid_ - begin_bid_) % page_size_ == 0)
237  {
238  for (unsigned i = 0; i < page_size_; i++)
239  {
240  if (pref_bid_ == end_bid_)
241  break;
242  cache_.prefetch_block(*pref_bid_);
243  ++pref_bid_;
244  }
245  }
246  }
247 
248  // entered new subblock
249  if (i_value_ % subblock_size == 0)
250  {
251  subblock_ = cache_.get_subblock(*curr_bid_, i_value_ / subblock_size);
252  }
253 
254  return true;
255  }
256 
257  //! Skip remaining values of the current subblock.
259  {
260  i_value_ = (i_value_ / subblock_size + 1) * subblock_size - 1;
261  operator ++ (); // takes care of prefetching etc
262  }
263 
264  //! Continue reading at given block and subblock.
266  {
267  if (curr_bid_ == end_bid_)
268  return;
269 
270  if (bid != curr_bid_)
271  dirty_ = false;
272 
273  cache_.release_block(*curr_bid_);
274 
275  if (bid == end_bid_)
276  return;
277 
278  // skip to block
279  while (curr_bid_ != bid) {
280  ++curr_bid_;
281 
282  if (prefetch_ && (curr_bid_ - begin_bid_) % page_size_ == 0)
283  {
284  for (unsigned i = 0; i < page_size_; i++)
285  {
286  if (pref_bid_ == end_bid_)
287  break;
288  cache_.prefetch_block(*pref_bid_);
289  ++pref_bid_;
290  }
291  }
292  }
293  // skip to subblock
294  i_value_ = i_subblock * subblock_size;
295  subblock_ = cache_.get_subblock(*curr_bid_, i_subblock);
296  cache_.retain_block(*curr_bid_);
297  }
298 };
299 
300 //! Buffered writing of values. New Blocks are allocated as needed.
301 template <class BlockType, class BidContainer>
303 {
304 public:
305  typedef BlockType block_type;
306  typedef BidContainer bid_container_type;
307 
308  typedef typename block_type::value_type subblock_type;
309  typedef typename subblock_type::value_type value_type;
310 
312 
313  enum {
314  block_size = block_type::size,
315  subblock_size = subblock_type::size
316  };
317 
318 private:
319  //! buffered writer
321  //! current buffer-block
323 
324  //! sequence of allocated blocks (to be expanded as needed)
326 
327  //! current block's index
329  //! current value's index in the range of [0..\#values per block[
331  //! number of blocks to allocate in a row
333 
334 public:
335  //! Create a new buffered writer.
336  //! \param c write values to these blocks (c holds the bids)
337  //! \param buffer_size Number of write-buffers to use
338  //! \param batch_size bulk buffered writing
340  int_type buffer_size, int_type batch_size)
341  : writer_(buffer_size, batch_size),
342  bids_(c),
343  i_block_(0),
344  i_value_(0),
345  increase_(config::get_instance()->disks_number() * 3)
346  {
347  block_ = writer_.get_free_block();
348  }
349 
351  {
352  flush();
353  }
354 
355  //! Write all values from given stream.
356  template <class StreamType>
357  void append_from_stream(StreamType& stream)
358  {
359  while (!stream.empty())
360  {
361  append(*stream);
362  ++stream;
363  }
364  }
365 
366  //! Write given value.
367  void append(const value_type& value)
368  {
369  internal_size_type i_subblock = (i_value_ / subblock_size);
370  (*block_)[i_subblock][i_value_ % subblock_size] = value;
371 
372  if (i_value_ + 1 < block_size * subblock_size)
373  i_value_++;
374  // reached end of a block
375  else
376  {
377  i_value_ = 0;
378 
379  // allocate new blocks if neccessary ...
380  if (i_block_ == bids_->size())
381  {
382  bids_->resize(bids_->size() + increase_);
384  bm->new_blocks(striping(), bids_->end() - increase_, bids_->end());
385  }
386  // ... and write current block
387  block_ = writer_.write(block_, (*bids_)[i_block_]);
388 
389  i_block_++;
390  }
391  }
392 
393  //! Continue writing at the beginning of the next subblock. TODO more
394  //! efficient
396  {
397  i_value_ = (i_value_ / subblock_size + 1) * subblock_size - 1;
398  append(value_type()); // writing and allocating blocks etc
399  }
400 
401  //! Flushes not yet written blocks.
402  void flush()
403  {
404  i_value_ = 0;
405  if (i_block_ == bids_->size())
406  {
407  bids_->resize(bids_->size() + increase_);
409  bm->new_blocks(striping(), bids_->end() - increase_, bids_->end());
410  }
411  block_ = writer_.write(block_, (*bids_)[i_block_]);
412  i_block_++;
413 
414  writer_.flush();
415  }
416 
417  //! Index of current block.
418  internal_size_type i_block() { return i_block_; }
419 
420  //! Index of current subblock.
421  internal_size_type i_subblock() { return i_value_ / subblock_size; }
422 };
423 
424 /*!
425  * Additional information about a stored value:
426  * - the bucket in which it can be found
427  * - where it is currently stored (intern or extern)
428  * - the buffer-node
429  * - the position in external memory
430  */
431 template <class HashMap>
433 {
434  typedef HashMap hash_map_type;
435  typedef typename hash_map_type::value_type value_type;
436  typedef typename hash_map_type::source_type source_type;
437  typedef typename hash_map_type::node_type node_type;
438 
441 
447 
449  : i_bucket_(internal_size_type(-1))
450  { }
451 
452  HashedValue(const value_type& value, internal_size_type i_bucket,
453  source_type src, node_type* node, external_size_type i_external)
454  : value_(value),
455  i_bucket_(i_bucket),
456  source_(src),
457  node_(node),
458  i_external_(i_external)
459  { }
460 };
461 
462 /*!
463  * Stream interface for all value-pairs currently stored in the map. Returned
464  * values are HashedValue-objects (actual value enriched with information on
465  * where the value can be found (bucket-number, internal, external)). Values,
466  * marked as deleted in internal-memory, are not returned; for modified values
467  * only the one in internal memory is returned.
468 */
469 template <class HashMap, class Reader>
471 {
472  typedef HashMap hash_map_type;
474 
475  typedef typename hash_map_type::node_type node_type;
476  typedef typename hash_map_type::bid_container_type::iterator bid_iterator;
477  typedef typename hash_map_type::buckets_container_type::iterator bucket_iterator;
478 
481 
483  Reader& reader_;
491 
493  Reader& reader, bid_iterator begin_bid,
495  : map_(map),
496  reader_(reader),
497  curr_bucket_(begin_bucket),
498  end_bucket_(end_bucket),
499  begin_bid_(begin_bid),
500  i_bucket_(0),
501  node_(curr_bucket_ != end_bucket_ ? curr_bucket_->list_ : NULL),
502  i_external_(0)
503  {
504  if (!empty())
505  value_ = find_next();
506  }
507 
508  const value_type& operator * () { return value_; }
509 
510  bool empty() const { return curr_bucket_ == end_bucket_; }
511 
513  {
514  if (value_.source_ == hash_map_type::src_internal)
515  node_ = node_->next();
516  else
517  {
518  ++reader_;
519  ++i_external_;
520  }
521  value_ = find_next();
522  }
523 
525  {
526  while (true)
527  {
528  // internal and external elements available
529  while (node_ && i_external_ < curr_bucket_->n_external_)
530  {
531  if (map_._leq(node_->value_.first, reader_.const_value().first))
532  {
533  if (map_._eq(node_->value_.first, reader_.const_value().first))
534  {
535  ++reader_;
536  ++i_external_;
537  }
538 
539  if (!node_->deleted())
540  return value_type(node_->value_, i_bucket_, hash_map_type::src_internal, node_, i_external_);
541  else
542  node_ = node_->next();
543  }
544  else
545  return value_type(reader_.const_value(), i_bucket_, hash_map_type::src_external, node_, i_external_);
546  }
547  // only internal elements left
548  while (node_)
549  {
550  if (!node_->deleted())
551  return value_type(node_->value_, i_bucket_, hash_map_type::src_internal, node_, i_external_);
552  else
553  node_ = node_->next();
554  }
555  // only external elements left
556  while (i_external_ < curr_bucket_->n_external_)
557  return value_type(reader_.const_value(), i_bucket_, hash_map_type::src_external, node_, i_external_);
558 
559  // if we made it to this point there are obviously no more values in the current bucket
560  // let's try the next one (outer while-loop!)
561  ++curr_bucket_;
562  ++i_bucket_;
563  if (curr_bucket_ == end_bucket_)
564  return value_type();
565 
566  node_ = curr_bucket_->list_;
567  i_external_ = 0;
568  reader_.skip_to(begin_bid_ + curr_bucket_->i_block_, curr_bucket_->i_subblock_);
569  }
570  }
571 };
572 
573 } // namespace hash_map
574 
576 
577 #endif // !STXXL_CONTAINERS_HASH_MAP_UTIL_HEADER
BidContainer bid_container_type
Definition: util.h:306
hash_map_type::internal_size_type internal_size_type
Definition: util.h:479
const value_type & const_value()
Get const-reference to current value.
Definition: util.h:192
void append_from_stream(StreamType &stream)
Write all values from given stream.
Definition: util.h:357
hash_map_type::buckets_container_type::iterator bucket_iterator
Definition: util.h:477
Additional information about a stored value:
Definition: util.h:432
HashedValue(const value_type &value, internal_size_type i_bucket, source_type src, node_type *node, external_size_type i_external)
Definition: util.h:452
bucket_iterator curr_bucket_
Definition: util.h:484
hash_map_type::node_type node_type
Definition: util.h:475
bid_container_type * bids_
sequence of allocated blocks (to be expanded as needed)
Definition: util.h:325
ValueType value_
Definition: util.h:34
hash_map_type::internal_size_type internal_size_type
Definition: util.h:439
void append(const value_type &value)
Write given value.
Definition: util.h:367
Block manager class.
Definition: block_manager.h:61
bid_iterator::value_type bid_type
Definition: util.h:105
cache_type::block_type block_type
Definition: util.h:101
external_size_type i_external_
Definition: util.h:489
hash_map_type::external_size_type external_size_type
Definition: util.h:440
bool deleted()
check if the next node is deleted.
Definition: util.h:37
bool dirty_
current block dirty ?
Definition: util.h:132
unsigned_type page_size_
pages, which are read at once from disk, consist of this many blocks
Definition: util.h:127
hash_map_type::external_size_type external_size_type
Definition: util.h:480
internal_size_type i_subblock()
Index of current subblock.
Definition: util.h:421
unsigned_type i_block_
current block&#39;s index
Definition: util.h:328
buffered_writer(bid_container_type *c, int_type buffer_size, int_type batch_size)
Create a new buffered writer.
Definition: util.h:339
hash_map_type::bid_container_type::iterator bid_iterator
Definition: util.h:476
bid_iterator end_bid_
points to the end of the block-sequence
Definition: util.h:117
block_type::value_type subblock_type
Definition: util.h:308
internal_size_type i_bucket_
Definition: util.h:487
uint_pair & operator++()
prefix increment operator (directly manipulates the integer parts)
Definition: uint_types.h:163
external_size_type i_external_
Definition: util.h:446
cache_type & cache_
shared block-cache
Definition: util.h:122
HashedValuesStream(bucket_iterator begin_bucket, bucket_iterator end_bucket, Reader &reader, bid_iterator begin_bid, hash_map_type &map)
Definition: util.h:492
subblock_type * subblock_
current subblock
Definition: util.h:134
internal_size_type i_subblock_
index of first subblock
Definition: util.h:75
subblock_type::value_type value_type
Definition: util.h:103
Tuning parameters for external memory hash map.
Definition: tuning.h:25
void finish_subblock()
Continue writing at the beginning of the next subblock. TODO more efficient.
Definition: util.h:395
bool prefetch_
true if prefetching enabled
Definition: util.h:125
void next_subblock()
Skip remaining values of the current subblock.
Definition: util.h:258
choose_int_types< my_pointer_size >::int_type int_type
Definition: types.h:63
block_type::value_type subblock_type
Definition: util.h:102
static instance_pointer get_instance()
Definition: singleton.h:38
unsigned_type internal_size_type
Definition: types.h:66
buffered_reader(bid_iterator seq_begin, bid_iterator seq_end, cache_type &cache, internal_size_type i_subblock=0, bool prefetch=true)
Create a new buffered reader to read the blocks in [seq_begin, seq_end)
Definition: util.h:143
node< ValueType > * next_and_del_
Definition: util.h:33
Striping disk allocation scheme functor.
Definition: block_alloc.h:41
bucket_iterator end_bucket_
Definition: util.h:485
internal_size_type i_block_
index of first block&#39;s bid (to be used as index for hash_map&#39;s bids_-array
Definition: util.h:72
#define STXXL_BEGIN_NAMESPACE
Definition: namespace.h:16
external_size_type n_external_
number of elements in external memory
Definition: util.h:68
hash_map_type::node_type node_type
Definition: util.h:437
stxxl::buffered_writer< block_type > writer_type
Definition: util.h:311
internal_size_type i_bucket_
Definition: util.h:443
unsigned_type increase_
number of blocks to allocate in a row
Definition: util.h:332
Stream interface for all value-pairs currently stored in the map.
Definition: util.h:470
bool set_deleted(bool d)
change deleted flag on the next node
Definition: util.h:42
NodeType * list_
entry point to the chain in internal memory
Definition: util.h:65
hash_map_type::value_type value_type
Definition: util.h:435
void new_blocks(const DiskAssignFunctor &functor, BIDIteratorClass bidbegin, BIDIteratorClass bidend, unsigned_type offset=0)
Allocates new blocks.
uint64 external_size_type
Definition: types.h:67
choose_int_types< my_pointer_size >::unsigned_type unsigned_type
Definition: types.h:64
Used to scan external memory with prefetching.
Definition: util.h:95
Access point to disks properties. Since 1.4.0: no config files are read automatically! ...
Definition: config.h:112
block_type * block_
current buffer-block
Definition: util.h:322
subblock_type::value_type value_type
Definition: util.h:309
writer_type writer_
buffered writer
Definition: util.h:320
unsigned_type i_value_
index within current block
Definition: util.h:111
hash_map_type::source_type source_type
Definition: util.h:436
External associative container (map). Introduction to map container: see STXXL Map (B+-tree) tutor...
Definition: map.h:82
bid_iterator begin_bid_
points to the beginning of the block-sequence
Definition: util.h:113
internal_size_type i_block()
Index of current block.
Definition: util.h:418
node< ValueType > * set_next(node< ValueType > *n)
change the &quot;next&quot; value of next node pointer
Definition: util.h:54
HashedValue< HashMap > value_type
Definition: util.h:473
Buffered writing of values. New Blocks are allocated as needed.
Definition: util.h:302
void flush()
Flushes not yet written blocks.
Definition: util.h:402
bid_iterator pref_bid_
points to the next block to prefetch
Definition: util.h:119
unsigned_type prefetch_pages_
number of pages to prefetch
Definition: util.h:129
bid_iterator curr_bid_
points to the current block
Definition: util.h:115
unsigned_type i_value_
current value&#39;s index in the range of [0..#values per block[
Definition: util.h:330
#define STXXL_END_NAMESPACE
Definition: namespace.h:17
node< ValueType > * next()
return the next node, without the &quot;next&quot; flag.
Definition: util.h:49
bucket(NodeType *list, external_size_type n_external, internal_size_type i_block, internal_size_type i_subblock)
Definition: util.h:84
value_type & value()
Get reference to current value. The current value&#39;s block&#39;s dirty flag will be set.
Definition: util.h:199
void skip_to(bid_iterator bid, internal_size_type i_subblock)
Continue reading at given block and subblock.
Definition: util.h:265