STXXL  1.4-dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
block_cache.h
Go to the documentation of this file.
1 /***************************************************************************
2  * include/stxxl/bits/containers/hash_map/block_cache.h
3  *
4  * Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  * Copyright (C) 2007 Markus Westphal <[email protected]>
7  * Copyright (C) 2014 Timo Bingmann <[email protected]>
8  *
9  * Distributed under the Boost Software License, Version 1.0.
10  * (See accompanying file LICENSE_1_0.txt or copy at
11  * http://www.boost.org/LICENSE_1_0.txt)
12  **************************************************************************/
13 
14 #ifndef STXXL_CONTAINERS_HASH_MAP_BLOCK_CACHE_HEADER
15 #define STXXL_CONTAINERS_HASH_MAP_BLOCK_CACHE_HEADER
16 
17 #ifdef STXXL_BOOST_CONFIG
18  #include <boost/config.hpp>
19 #endif
20 
21 #include <stxxl/bits/noncopyable.h>
25 
26 #include <vector>
27 #include <list>
28 
30 
31 namespace hash_map {
32 
33 //! Used inside block_cache for buffering write requests of cached blocks.
34 template <class BlockType>
36 {
37 public:
38  typedef BlockType block_type;
39  typedef typename block_type::bid_type bid_type;
40 
41 protected:
42  std::vector<block_type*> blocks_;
43  std::vector<request_ptr> reqs_;
44  std::vector<unsigned_type> free_blocks_;
45  std::list<unsigned_type> busy_blocks_; // TODO make that a circular-buffer
46 
47 public:
49  {
50  blocks_.reserve(size);
51  free_blocks_.reserve(size);
52  reqs_.resize(size);
53 
54  for (unsigned_type i = 0; i < size; i++) {
55  blocks_.push_back(new block_type());
56  free_blocks_.push_back(i);
57  }
58  }
59 
60  //! Writes the given block back to disk;
61  //! callers have to exchange the passed block with the returned one!
62  block_type * write(block_type* write_block, const bid_type& bid)
63  {
64  if (free_blocks_.empty()) {
65  unsigned_type i_buffer = busy_blocks_.front();
66  busy_blocks_.pop_front();
67 
68  if (reqs_[i_buffer].valid())
69  reqs_[i_buffer]->wait();
70 
71  free_blocks_.push_back(i_buffer);
72  }
73 
74  unsigned_type i_buffer = free_blocks_.back();
75  free_blocks_.pop_back();
76  block_type* buffer = blocks_[i_buffer];
77 
78  blocks_[i_buffer] = write_block;
79  reqs_[i_buffer] = blocks_[i_buffer]->write(bid);
80  busy_blocks_.push_back(i_buffer);
81 
82  return buffer;
83  }
84 
85  void flush()
86  {
87  while (!busy_blocks_.empty()) {
88  unsigned_type i_buffer = busy_blocks_.front();
89  busy_blocks_.pop_front();
90  if (reqs_[i_buffer].valid())
91  reqs_[i_buffer]->wait();
92  }
93  busy_blocks_.clear();
94  free_blocks_.clear();
95  for (unsigned_type i = 0; i < blocks_.size(); i++)
96  free_blocks_.push_back(i);
97  }
98 
100  {
101  std::swap(blocks_, obj.blocks_);
102  std::swap(reqs_, obj.reqs_);
103  std::swap(free_blocks_, obj.free_blocks_);
104  std::swap(busy_blocks_, obj.busy_blocks_);
105  }
106 
108  {
109  flush();
110  for (unsigned_type i = 0; i < blocks_.size(); i++)
111  delete blocks_[i];
112  }
113 };
114 
115 //! Cache of blocks contained in an external memory hash map. Uses the
116 //! stxxl::lru_pager as eviction algorithm.
117 template <class BlockType>
118 class block_cache : private noncopyable
119 {
120 public:
121  typedef BlockType block_type;
122  typedef typename block_type::bid_type bid_type;
123  typedef typename block_type::value_type subblock_type;
124  typedef typename subblock_type::bid_type subblock_bid_type;
125 
126 protected:
127  struct bid_eq
128  {
129  bool operator () (const bid_type& a, const bid_type& b) const
130  {
131  return (a.storage == b.storage && a.offset == b.offset);
132  }
133  };
134 
135  struct bid_hash
136  {
137  size_t operator () (const bid_type& bid) const
138  {
139  return longhash1(bid.offset + reinterpret_cast<uint64>(bid.storage));
140  }
141 #ifdef STXXL_MSVC
142  bool operator () (const bid_type& a, const bid_type& b) const
143  {
144  return (a.storage < b.storage) ||
145  (a.storage == b.storage && a.offset < b.offset);
146  }
147  enum
148  { // parameters for hash table
149  bucket_size = 4, // 0 < bucket_size
150  min_buckets = 8 // min_buckets = 2 ^^ N, 0 < N
151  };
152 #endif
153  };
154 
157 
158  typedef typename compat_hash_map<bid_type, unsigned_type,
159  bid_hash>::result bid_map_type;
160 
161  enum { valid_all = block_type::size };
162 
164 
165  //! cached blocks
166  std::vector<block_type*> blocks_;
167  //! bids of cached blocks
168  std::vector<bid_type> bids_;
169  std::vector<unsigned_type> retain_count_;
170 
171  //! true iff block has been altered while in cache
172  std::vector<unsigned char> dirty_;
173 
174  //! valid_all or the actually loaded subblock's index
175  std::vector<unsigned_type> valid_subblock_;
176 
177  //! free blocks as indices to blocks_-vector
178  std::vector<unsigned_type> free_blocks_;
179  std::vector<request_ptr> reqs_;
180 
183 
184  /* statistics */
191 
192 public:
193  //! Construct a new block-cache.
194  //! \param cache_size cache-size in number of blocks
196  : write_buffer_(config::get_instance()->disks_number() * 2),
197  blocks_(cache_size),
198  bids_(cache_size),
199  retain_count_(cache_size),
200  dirty_(cache_size, false),
201  valid_subblock_(cache_size),
202  free_blocks_(cache_size),
203  reqs_(cache_size),
204  pager_(cache_size),
205  n_found(0),
206  n_not_found(0),
207  n_read(0),
208  n_written(0),
209  n_clean_forced(0),
210  n_wrong_subblock(0)
211  {
212  for (unsigned_type i = 0; i < cache_size; i++)
213  {
214  blocks_[i] = new block_type();
215  free_blocks_[i] = i;
216  }
217  }
218 
219  //! Return cache-size
221  {
222  return blocks_.size();
223  }
224 
226  {
227  STXXL_VERBOSE1("hash_map::block_cache destructor addr=" << this);
228 
229  for (typename bid_map_type::const_iterator i = bid_map_.begin();
230  i != bid_map_.end(); ++i)
231  {
232  const unsigned_type i_block = (*i).second;
233 
234  if (reqs_[i_block].valid())
235  reqs_[i_block]->wait();
236 
237  if (dirty_[i_block]) {
238  blocks_[i_block] =
239  write_buffer_.write(blocks_[i_block], bids_[i_block]);
240  }
241  }
242  write_buffer_.flush();
243 
244  for (unsigned_type i = 0; i < size(); ++i)
245  delete blocks_[i];
246  }
247 
248 protected:
249  //! Force a block from the cache; write back to disk if dirty
250  void kick_block()
251  {
252  unsigned_type i_block2kick;
253 
254  unsigned_type max_tries = size() + 1;
255  unsigned_type i = 0;
256  do
257  {
258  ++i;
259  i_block2kick = pager_.kick();
260  if (i == max_tries)
261  {
262  throw std::runtime_error(
263  "The block cache is too small,"
264  "no block can be kicked out (all blocks are retained)!"
265  );
266  }
267  pager_.hit(i_block2kick);
268  } while (retain_count_[i_block2kick] > 0);
269 
270  if (valid_subblock_[i_block2kick] == valid_all &&
271  reqs_[i_block2kick].valid())
272  {
273  reqs_[i_block2kick]->wait();
274  }
275 
276  if (dirty_[i_block2kick])
277  {
278  blocks_[i_block2kick] =
279  write_buffer_.write(blocks_[i_block2kick], bids_[i_block2kick]);
280  ++n_written;
281  }
282  else
283  ++n_clean_forced;
284 
285  bid_map_.erase(bids_[i_block2kick]);
286  free_blocks_.push_back(i_block2kick);
287  }
288 
289 public:
290  //! Retain a block in cache. Blocks, that are retained by at least one
291  //! client, won't get kicked. Make sure to release all retained blocks
292  //! again.
293  //!
294  //! \param bid block, whose retain-count is to be increased
295  //! \return true if block was cached, false otherwise
296  bool retain_block(const bid_type& bid)
297  {
298  typename bid_map_type::const_iterator it = bid_map_.find(bid);
299  if (it == bid_map_.end())
300  return false;
301 
302  unsigned_type i_block = (*it).second;
303  retain_count_[i_block]++;
304  return true;
305  }
306 
307  //! Release a block (decrement retain-count). If the retain-count reaches
308  //! 0, a block may be kicked again.
309  //!
310  //! \param bid block, whose retain-count is to be decremented
311  //! \return true if operation was successfull (block cached and
312  //! retain-count > 0), false otherwise
313  bool release_block(const bid_type& bid)
314  {
315  typename bid_map_type::const_iterator it = bid_map_.find(bid);
316  if (it == bid_map_.end())
317  return false;
318 
319  unsigned_type i_block = (*it).second;
320  if (retain_count_[i_block] == 0)
321  return false;
322 
323  retain_count_[i_block]--;
324  return true;
325  }
326 
327  //! Set given block's dirty-flag. Note: If the given block was only
328  //! partially loaded, it will be completely reloaded.
329  //!
330  //! \return true if block cached, false otherwise
331  bool make_dirty(const bid_type& bid)
332  {
333  typename bid_map_type::const_iterator it = bid_map_.find(bid);
334  if (it == bid_map_.end())
335  return false;
336 
337  unsigned_type i_block = (*it).second;
338 
339  // only complete blocks can be marked as dirty
340  if (valid_subblock_[i_block] != valid_all)
341  {
342  reqs_[i_block] = blocks_[i_block]->read(bid);
343  valid_subblock_[i_block] = valid_all;
344  }
345 
346  if (reqs_[i_block].valid()) {
347  if (reqs_[i_block]->poll() == false)
348  reqs_[i_block]->wait();
349  }
350 
351  dirty_[i_block] = true;
352  return true;
353  }
354 
355  //! Retrieve a subblock from the cache. If not yet cached, only the
356  //! subblock will be loaded.
357  //!
358  //! \param bid block, to which the requested subblock belongs
359  //! \param i_subblock index of requested subblock
360  //! \return pointer to subblock
362  {
363  block_type* block;
364  unsigned_type i_block;
365  n_read++;
366 
367  // block (partly) cached?
368  typename bid_map_type::const_iterator it = bid_map_.find(bid);
369  if (it != bid_map_.end())
370  {
371  i_block = (*it).second;
372  block = blocks_[i_block];
373 
374  // complete block or wanted subblock is in the cache
375  if (valid_subblock_[i_block] == valid_all ||
376  valid_subblock_[i_block] == i_subblock)
377  {
378  ++n_found;
379 
380  if (valid_subblock_[i_block] == valid_all &&
381  reqs_[i_block].valid())
382  {
383  // request not yet completed?
384  if (reqs_[i_block]->poll() == false)
385  reqs_[i_block]->wait();
386  }
387 
388  return &((*block)[i_subblock]);
389  }
390  // wrong subblock in cache
391  else
392  {
393  ++n_not_found;
394  ++n_wrong_subblock;
395  // actually loading the subblock will be done below
396 
397  // note: if a client still holds a reference to the "old"
398  // subblock, it will find its data to be still valid.
399  }
400  }
401  // block not cached
402  else
403  {
404  n_not_found++;
405 
406  if (free_blocks_.empty())
407  kick_block();
408 
409  i_block = free_blocks_.back(), free_blocks_.pop_back();
410  block = blocks_[i_block];
411 
412  bid_map_[bid] = i_block;
413  bids_[i_block] = bid;
414  dirty_[i_block] = false;
415  retain_count_[i_block] = 0;
416  }
417 
418  // now actually load the wanted subblock and store it within *block
419  subblock_bid_type subblock_bid(
420  bid.storage, bid.offset + i_subblock * subblock_type::raw_size
421  );
422  request_ptr req = ((*block)[i_subblock]).read(subblock_bid);
423  req->wait();
424 
425  valid_subblock_[i_block] = i_subblock;
426  pager_.hit(i_block);
427 
428  return &((*block)[i_subblock]);
429  }
430 
431  //! Load a block in advance.
432  //! \param bid Identifier of the block to load
433  void prefetch_block(const bid_type& bid)
434  {
435  unsigned_type i_block;
436 
437  // cached
438  typename bid_map_type::const_iterator it = bid_map_.find(bid);
439  if (it != bid_map_.end())
440  {
441  i_block = (*it).second;
442 
443  // complete block cached; we can finish here
444  if (valid_subblock_[i_block] == valid_all) {
445  pager_.hit(i_block);
446  return;
447  }
448 
449  // only a single subblock is cached; we have to load the
450  // complete block (see below)
451  }
452  // not even a subblock cached
453  else {
454  if (free_blocks_.empty())
455  kick_block();
456 
457  i_block = free_blocks_.back(), free_blocks_.pop_back();
458 
459  bid_map_[bid] = i_block;
460  bids_[i_block] = bid;
461  retain_count_[i_block] = 0;
462  dirty_[i_block] = false;
463  }
464 
465  // now actually load the block
466  reqs_[i_block] = blocks_[i_block]->read(bid);
467  valid_subblock_[i_block] = valid_all;
468  pager_.hit(i_block);
469  }
470 
471  //! Write all dirty blocks back to disk
472  void flush()
473  {
474  for (typename bid_map_type::const_iterator i = bid_map_.begin();
475  i != bid_map_.end(); ++i)
476  {
477  const unsigned_type i_block = (*i).second;
478  if (dirty_[i_block])
479  {
480  blocks_[i_block] =
481  write_buffer_.write(blocks_[i_block], bids_[i_block]);
482 
483  dirty_[i_block] = false;
484  }
485  }
486  write_buffer_.flush();
487  }
488 
489  //! Empty cache; don't write back dirty blocks
490  void clear()
491  {
492  free_blocks_.clear();
493  for (unsigned_type i = 0; i < size(); i++)
494  {
495  if (reqs_[i].valid()) {
496  reqs_[i]->cancel();
497  reqs_[i]->wait();
498  }
499 
500  free_blocks_.push_back(i);
501  }
502  bid_map_.clear();
503  }
504 
505  //! Print statistics: Number of hits/misses, blocks forced from cache or
506  //! written back.
507  void print_statistics(std::ostream& o = std::cout) const
508  {
509  o << "Blocks found : " << n_found << " (" << 100. * double(n_found) / double(n_read) << "%)" << std::endl;
510  o << "Blocks not found : " << n_not_found << std::endl;
511  o << "Blocks read : " << n_read << std::endl;
512  o << "Blocks written : " << n_written << std::endl;
513  o << "Clean blocks forced from the cache: " << n_clean_forced << std::endl;
514  o << "Wrong subblock cached : " << n_wrong_subblock << std::endl;
515  }
516 
517  //! Reset all counters to zero
519  {
520  n_found = 0;
521  n_not_found = 0;
522  n_read = 0;
523  n_written = 0;
524  n_clean_forced = 0;
525  n_wrong_subblock = 0;
526  }
527 
528  //! Exchange contents of two caches
529  //! \param obj cache to swap contents with
530  void swap(block_cache& obj)
531  {
532  write_buffer_.swap(obj.write_buffer_);
533  std::swap(blocks_, obj.blocks_);
534  std::swap(bids_, obj.bids_);
535  std::swap(retain_count_, obj.retain_count_);
536 
537  std::swap(dirty_, obj.dirty_);
538  std::swap(valid_subblock_, obj.valid_subblock_);
539 
540  std::swap(free_blocks_, obj.free_blocks_);
541  std::swap(reqs_, obj.reqs_);
542 
543  std::swap(bid_map_, obj.bid_map_);
544  std::swap(pager_, obj.pager_);
545 
546  std::swap(n_found, obj.n_found);
547  std::swap(n_not_found, obj.n_found);
548  std::swap(n_read, obj.n_read);
549  std::swap(n_written, obj.n_written);
550  std::swap(n_clean_forced, obj.n_clean_forced);
551  std::swap(n_wrong_subblock, obj.n_wrong_subblock);
552  }
553 
554 #if 0 // for debugging, requires data items to be ostream-able.
555 
556  //! Show currently cached blocks
557  void dump_cache(std::ostream& os) const
558  {
559  for (size_t i = 0; i < blocks_.size(); i++)
560  {
561  bid_type bid = bids_[i];
562  if (bid_map_.count(bid) == 0) {
563  os << "Block " << i << ": empty\n";
564  continue;
565  }
566 
567  os << "Block " << i << ": bid=" << bids_[i]
568  << " dirty=" << dirty_[i]
569  << " retain_count=" << retain_count_[i]
570  << " valid_subblock=" << valid_subblock_[i] << "\n";
571 
572  for (size_t k = 0; k < block_type::size; k++) {
573  os << " Subbblock " << k << ": ";
574  if (valid_subblock_[i] != valid_all && valid_subblock_[i] != k)
575  {
576  os << "not valid\n";
577  continue;
578  }
579  for (size_t l = 0; l < block_type::value_type::size; l++) {
580  os << "(" << (*blocks_[i])[k][l].first
581  << ", " << (*blocks_[i])[k][l].second << ") ";
582  }
583  os << std::endl;
584  }
585  }
586  }
587 #endif
588 };
589 
590 } // namespace hash_map
591 
593 
594 namespace std {
595 
596 template <class BlockType>
599 {
600  a.swap(b);
601 }
602 
603 template <class HashMap>
606 {
607  a.swap(b);
608 }
609 
610 } // namespace std
611 
612 #endif // !STXXL_CONTAINERS_HASH_MAP_BLOCK_CACHE_HEADER
std::vector< unsigned_type > free_blocks_
free blocks as indices to blocks_-vector
Definition: block_cache.h:178
bool make_dirty(const bid_type &bid)
Set given block&#39;s dirty-flag. Note: If the given block was only partially loaded, it will be complete...
Definition: block_cache.h:331
std::vector< bid_type > bids_
bids of cached blocks
Definition: block_cache.h:168
void reset_statistics()
Reset all counters to zero.
Definition: block_cache.h:518
long long int int64
Definition: types.h:38
std::vector< block_type * > blocks_
Definition: block_cache.h:42
size_t longhash1(uint64 key_)
Definition: utils.h:230
std::vector< request_ptr > reqs_
Definition: block_cache.h:43
block_cache_write_buffer< block_type > write_buffer_type
Definition: block_cache.h:156
block_cache_write_buffer(unsigned_type size)
Definition: block_cache.h:48
std::vector< unsigned char > dirty_
true iff block has been altered while in cache
Definition: block_cache.h:172
unsigned_type size() const
Return cache-size.
Definition: block_cache.h:220
subblock_type * get_subblock(const bid_type &bid, unsigned_type i_subblock)
Retrieve a subblock from the cache. If not yet cached, only the subblock will be loaded.
Definition: block_cache.h:361
bool retain_block(const bid_type &bid)
Retain a block in cache. Blocks, that are retained by at least one client, won&#39;t get kicked...
Definition: block_cache.h:296
void kick_block()
Force a block from the cache; write back to disk if dirty.
Definition: block_cache.h:250
bool release_block(const bid_type &bid)
Release a block (decrement retain-count). If the retain-count reaches 0, a block may be kicked again...
Definition: block_cache.h:313
std::vector< block_type * > blocks_
cached blocks
Definition: block_cache.h:166
block_type::value_type subblock_type
Definition: block_cache.h:123
compat_hash_map< bid_type, unsigned_type, bid_hash >::result bid_map_type
Definition: block_cache.h:159
Used inside block_cache for buffering write requests of cached blocks.
Definition: block_cache.h:35
virtual void wait(bool measure_time=true)=0
Suspends calling thread until completion of the request.
void swap(block_cache_write_buffer &obj)
Definition: block_cache.h:99
std::vector< unsigned_type > retain_count_
Definition: block_cache.h:169
std::list< unsigned_type > busy_blocks_
Definition: block_cache.h:45
#define STXXL_BEGIN_NAMESPACE
Definition: namespace.h:16
void clear()
Empty cache; don&#39;t write back dirty blocks.
Definition: block_cache.h:490
#define STXXL_VERBOSE1(x)
Definition: verbose.h:113
stxxl::lru_pager pager_type
Definition: block_cache.h:155
void swap(block_cache &obj)
Exchange contents of two caches.
Definition: block_cache.h:530
void prefetch_block(const bid_type &bid)
Load a block in advance.
Definition: block_cache.h:433
Pager with LRU replacement strategy.
Definition: pager.h:66
void flush()
Write all dirty blocks back to disk.
Definition: block_cache.h:472
Cache of blocks contained in an external memory hash map. Uses the stxxl::lru_pager as eviction algor...
Definition: block_cache.h:118
block_type::bid_type bid_type
Definition: block_cache.h:122
choose_int_types< my_pointer_size >::unsigned_type unsigned_type
Definition: types.h:64
Main implementation of external memory hash map.
Definition: hash_map.h:60
Access point to disks properties. Since 1.4.0: no config files are read automatically! ...
Definition: config.h:112
block_cache(unsigned_type cache_size)
Construct a new block-cache.
Definition: block_cache.h:195
void print_statistics(std::ostream &o=std::cout) const
Print statistics: Number of hits/misses, blocks forced from cache or written back.
Definition: block_cache.h:507
std::vector< unsigned_type > free_blocks_
Definition: block_cache.h:44
std::vector< unsigned_type > valid_subblock_
valid_all or the actually loaded subblock&#39;s index
Definition: block_cache.h:175
block_type * write(block_type *write_block, const bid_type &bid)
Writes the given block back to disk; callers have to exchange the passed block with the returned one!...
Definition: block_cache.h:62
std::vector< request_ptr > reqs_
Definition: block_cache.h:179
write_buffer_type write_buffer_
Definition: block_cache.h:163
#define STXXL_END_NAMESPACE
Definition: namespace.h:17
subblock_type::bid_type subblock_bid_type
Definition: block_cache.h:124