14 #ifndef STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
15 #define STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
33 #define STXXL_VERBOSE_HASH_MAP(m) \
34 STXXL_VERBOSE1("hash_map[" << static_cast<const void*>(this) << "]::" << m)
52 template <
class KeyType,
56 unsigned SubBlockSize = 4*1024,
57 unsigned SubBlocksPerBlock = 256,
58 class AllocatorType = std::allocator<std::pair<const KeyType, MappedType> >
63 typedef hash_map<KeyType, MappedType, HashType, KeyCompareType,
98 block_raw_size = SubBlocksPerBlock * SubBlockSize,
99 subblock_raw_size = SubBlockSize
104 subblocks_per_block = SubBlocksPerBlock,
187 block_cache_(
tuning::get_instance()->blockcache_size),
191 opt_load_factor_(0.875)
193 max_buffer_size_ = buffer_size /
sizeof(node_type);
209 template <
class InputIterator>
223 block_cache_(
tuning::get_instance()->blockcache_size),
227 opt_load_factor_(0.875)
229 max_buffer_size_ = buffer_size /
sizeof(node_type);
230 insert(begin, end, mem_to_sort);
249 {
return node_allocator_; }
265 reader_type reader(bids_.begin(), bids_.end(), block_cache_);
266 values_stream_type values(buckets_.begin(), buckets_.end(),
267 reader, bids_.begin(), *
this);
270 while (!values.empty())
312 if (buckets_.size() == 0)
313 _rebuild_buckets(128);
316 bucket_type&
bucket = buckets_[i_bucket];
317 node_type*
node = _find_key_internal(bucket, value.first);
320 if (node && _eq(node->
value_.first, value.first))
322 bool old_deleted = node->
deleted();
329 return std::pair<iterator, bool>(
331 0, src_internal,
false, value.first), old_deleted);
337 = _find_key_external(bucket, value.first);
343 if (i_external < bucket.
n_external_ && _eq(ext_value.first, value.first))
345 return std::pair<iterator, bool>(
347 i_external, src_external,
true, value.first),
false);
353 node_type* new_node =
356 : (bucket.
list_ = _new_node(value, bucket.
list_,
false));
358 iterator it(
this, i_bucket, new_node,
359 0, src_internal,
false, value.first);
362 if (buffer_size_ >= max_buffer_size_)
365 return std::pair<iterator, bool>(it,
true);
377 bucket_type&
bucket = buckets_[i_bucket];
378 node_type*
node = _find_key_internal(bucket, value.first);
381 if (node && _eq(node->
value_.first, value.first))
388 return iterator(
this, i_bucket, node,
389 0, src_internal,
false, value.first);
397 node_type* new_node =
400 : (bucket.
list_ = _new_node(value, bucket.
list_,
false));
407 iterator it(
this, i_bucket, new_node,
408 0, src_internal,
false, value.first);
411 if (buffer_size_ >= max_buffer_size_)
425 if (it.
source_ == src_internal)
432 node_type*
node = _find_key_internal(bucket, it.
key_);
433 assert(!node || !_eq(node->
value_.first, it.
key_));
444 if (buffer_size_ >= max_buffer_size_)
455 bucket_type&
bucket = buckets_[i_bucket];
456 node_type*
node = _find_key_internal(bucket, key);
459 if (node && _eq(node->
value_.first, key))
475 = _find_key_external(bucket, key);
481 if (i_external < bucket.
n_external_ && _eq(ext_value.first, key))
493 if (buffer_size_ >= max_buffer_size_)
509 bucket_type&
bucket = buckets_[i_bucket];
510 node_type*
node = _find_key_internal(bucket, key);
513 if (node && _eq(node->
value_.first, key))
536 if (buffer_size_ >= max_buffer_size_)
547 block_cache_.
flush();
548 block_cache_.
clear();
552 i_bucket < buckets_.size(); i_bucket++)
554 _erase_nodes(buckets_[i_bucket].list_, NULL);
555 buckets_[i_bucket] = bucket_type();
572 std::swap(bids_, obj.
bids_);
579 std::swap(hash_, obj.
hash_);
580 std::swap(cmp_, obj.
cmp_);
604 n_subblocks_loaded = n_found_external = n_found_internal = n_not_found = 0;
610 o <<
"Find-statistics:" << std::endl;
611 o <<
" Found internal : " << n_found_internal << std::endl;
612 o <<
" Found external : " << n_found_external << std::endl;
613 o <<
" Not found : " << n_not_found << std::endl;
614 o <<
" Subblocks searched : " << n_subblocks_loaded << std::endl;
624 if (buffer_size_ + 1 >= max_buffer_size_)
628 bucket_type&
bucket = buckets_[i_bucket];
629 node_type*
node = _find_key_internal(bucket, key);
632 if (node && _eq(node->
value_.first, key)) {
635 return this->_end<iterator>();
637 return iterator(
this, i_bucket, node, 0, src_internal,
false, key);
642 = _find_key_external(bucket, key);
648 if (i_external < bucket.
n_external_ && _eq(value.first, key)) {
658 node_type* new_node =
661 : (bucket.
list_ = _new_node(value, bucket.
list_,
false));
667 return iterator(
this, i_bucket, new_node, i_external + 1, src_internal,
true, key);
672 return this->_end<iterator>();
682 const bucket_type&
bucket = buckets_[i_bucket];
683 node_type*
node = _find_key_internal(bucket, key);
686 if (node && _eq(node->
value_.first, key)) {
689 return this->_end<const_iterator>();
696 = _find_key_external(bucket, key);
702 if (i_external < bucket.
n_external_ && _eq(value.first, key)) {
709 return this->_end<const_iterator>();
720 return (cit == end()) ? 0 : 1;
729 return std::pair<iterator, iterator>(it, it);
738 return std::pair<const_iterator, const_iterator>(cit, cit);
745 if (buffer_size_ + 1 >= max_buffer_size_)
749 bucket_type&
bucket = buckets_[i_bucket];
750 node_type*
node = _find_key_internal(bucket, key);
753 if (node && _eq(node->
value_.first, key)) {
759 return node->
value_.second;
764 = _find_key_external(bucket, key);
770 (i_external < bucket.
n_external_ && _eq(found_value.first, key))
777 node_type* new_node =
779 ? node->
set_next(_new_node(buffer_value, node->
next(),
false))
780 : (bucket.
list_ = _new_node(buffer_value, bucket.
list_,
false));
785 return new_node->
value_.second;
791 {
return buckets_.size(); }
799 {
return _bkt_num(k); }
804 {
return (
float)num_total_ / ((float)subblock_size * (
float)buckets_.size()); }
812 opt_load_factor_ = z;
813 if (load_factor() > opt_load_factor_)
827 return buffer_size_ *
sizeof(node_type);
833 return max_buffer_size_ *
sizeof(node_type);
840 max_buffer_size_ = buffer_size /
sizeof(node_type);
841 if (buffer_size_ >= max_buffer_size_)
847 template <
class Iterator>
852 if (buckets_.size() == 0)
853 return _end<Iterator>();
856 Iterator it(non_const_this, 0, buckets_[0].list_,
865 template <
class Iterator>
869 return Iterator(non_const_this);
889 return node_allocator_.allocate(1);
895 node_allocator_.deallocate(node, 1);
902 node_type*
node = _get_node();
913 node_type*
curr = first;
916 node_type* next = curr->
next();
925 return _bkt_num(key, buckets_.size());
957 node_type* old = NULL;
959 curr && _leq(
curr->value_.first, key);
974 subblock_type* subblock;
984 i_subblock < n_subblocks; i_subblock++)
986 subblock = _load_subblock(bucket, i_subblock);
989 (i_subblock + 1 < n_subblocks)
998 if (_lt((*subblock)[n_values - 1].first, key))
1003 while (i_lower + 1 != i_upper)
1006 if (_leq((*subblock)[i_middle].first, key))
1014 if (_eq(value.first, key))
1016 (i_subblock * subblock_size + i_lower, value);
1035 n_subblocks_loaded++;
1046 return block_cache_.
get_subblock(bid, i_subblock_within);
1055 {
return hvalue.
value_; }
1063 template <
class InputStream,
class ValueExtractor>
1080 i_bucket_(i_bucket),
1084 empty_ = find_next();
1094 empty_ = find_next();
1102 if (map_->_bkt_num(vextract_(value_).first) != i_bucket_)
1117 typedef HashingStream<values_stream_type, HashedValueExtractor> hashing_stream_type;
1119 const int_type write_buffer_size = config::get_instance()->disks_number() * 4;
1123 n_new = (
internal_size_type)ceil((
double)num_total_ / ((double)subblock_size * (
double)opt_load_factor()));
1126 if (n_desired > n_new)
1127 n_new = std::min<internal_size_type>(n_desired, max_bucket_count());
1130 buckets_container_type old_buckets(n_new);
1131 std::swap(buckets_, old_buckets);
1133 bid_container_type old_bids;
1134 std::swap(bids_, old_bids);
1140 =
new reader_type(old_bids.begin(), old_bids.end(), block_cache_);
1142 values_stream_type values_stream(old_buckets.begin(), old_buckets.end(),
1143 *reader, old_bids.begin(), *
this);
1145 writer_type writer(&bids_, write_buffer_size, write_buffer_size / 2);
1154 i_bucket < buckets_.size(); i_bucket++)
1156 buckets_[i_bucket] = bucket_type();
1157 buckets_[i_bucket].i_block_ = writer.i_block();
1158 buckets_[i_bucket].i_subblock_ = writer.i_subblock();
1160 hashing_stream_type
hasher(values_stream, i_bucket, HashedValueExtractor(),
this);
1162 while (!hasher.empty())
1164 const hashed_value_type& hvalue = *hasher;
1167 writer.append(hvalue.
value_);
1172 writer.finish_subblock();
1173 buckets_[i_bucket].n_external_ = hasher.bucket_size_;
1174 num_total_ += hasher.bucket_size_;
1180 block_cache_.
clear();
1187 i_bucket < old_buckets.size(); i_bucket++)
1189 _erase_nodes(old_buckets[i_bucket].list_, NULL);
1190 old_buckets[i_bucket] = bucket_type();
1202 template <
class InputStream>
1210 : map_(map), in_(input)
1213 bool empty()
const {
return in_.empty(); }
1221 while (!in_.empty() && v_old.first == (*in_).first)
1226 template <
class InputStream>
1230 typedef std::pair<internal_size_type, typename InputStream::value_type>
value_type;
1235 : map_(map), in_(input)
1238 bool empty()
const {
return in_.empty(); }
1241 {
return value_type(map_.hash_((*in_).first), *in_); }
1252 const value_type& operator () (std::pair<internal_size_type, value_type>& v)
1253 {
return v.second; }
1261 struct Cmp :
public std::binary_function<
1262 std::pair<internal_size_type, value_type>,
1263 std::pair<internal_size_type, value_type>, bool
1269 bool operator () (
const std::pair<internal_size_type, value_type>& a,
1270 const std::pair<internal_size_type, value_type>& b)
const
1272 return (a.first < b.first) ||
1273 ((a.first == b.first) && map_.cmp_(a.second.first, b.second.first));
1277 return std::pair<internal_size_type, value_type>(
1284 return std::pair<internal_size_type, value_type>(
1296 template <
class InputIterator>
1302 typedef HashingStream<old_values_stream, HashedValueExtractor> old_hashing_stream;
1308 typedef AddHashStream<input_stream> new_values_stream;
1312 typedef UniqueValueStream<new_sorted_values_stream> new_unique_values_stream;
1314 typedef HashingStream<new_unique_values_stream, StripHashFunctor> new_hashing_stream;
1318 int_type write_buffer_size = config::get_instance()->disks_number() * 2;
1323 if (n_buckets_new > max_bucket_count())
1324 n_buckets_new = max_bucket_count();
1330 std::swap(buckets_, old_buckets);
1332 bid_container_type old_bids;
1333 std::swap(bids_, old_bids);
1336 reader_type* reader =
new reader_type(old_bids.begin(), old_bids.end(),
1338 old_values_stream old_values(old_buckets.begin(), old_buckets.end(),
1339 *reader, old_bids.begin(), *
this);
1343 new_values_stream new_values(input, *
this);
1344 new_sorted_values_stream new_sorted_values(new_values, Cmp(*
this), mem);
1345 new_unique_values_stream new_unique_values(new_sorted_values, *
this);
1347 writer_type writer(&bids_, write_buffer_size, write_buffer_size / 2);
1352 buckets_[i_bucket] = bucket_type();
1353 buckets_[i_bucket].i_block_ = writer.i_block();
1354 buckets_[i_bucket].i_subblock_ = writer.i_subblock();
1356 old_hashing_stream old_hasher(old_values, i_bucket, HashedValueExtractor(),
this);
1357 new_hashing_stream new_hasher(new_unique_values, i_bucket, StripHashFunctor(),
this);
1361 while (!old_hasher.empty() && !new_hasher.empty())
1365 key_type old_key = (*old_hasher).value_.first;
1366 key_type new_key = (*new_hasher).second.first;
1369 if ((old_hash < new_hash) || (old_hash == new_hash && cmp_(old_key, new_key)))
1371 const hashed_value_type& hvalue = *old_hasher;
1373 writer.append(hvalue.
value_);
1379 if (_eq(old_key, new_key))
1381 const hashed_value_type& hvalue = *old_hasher;
1385 writer.append((*new_hasher).second);
1391 while (!old_hasher.empty())
1393 const hashed_value_type& hvalue = *old_hasher;
1395 writer.append(hvalue.
value_);
1400 while (!new_hasher.empty())
1402 writer.append((*new_hasher).second);
1407 writer.finish_subblock();
1408 buckets_[i_bucket].n_external_ = bucket_size;
1409 num_total_ += bucket_size;
1413 block_cache_.
clear();
1421 i_bucket < old_buckets.size(); i_bucket++)
1423 _erase_nodes(old_buckets[i_bucket].list_, NULL);
1424 old_buckets[i_bucket] = bucket_type();
1440 return (hash_a < hash_b) ||
1441 ((hash_a == hash_b) && cmp_(a, b));
1454 {
return !cmp_(a, b) && !cmp_(b, a); }
1466 reader_type reader(bids_.begin(), bids_.end(), &block_cache_);
1469 std::cout <<
"block " << i_block <<
":\n";
1471 for (
internal_size_type i_subblock = 0; i_subblock < subblocks_per_block; i_subblock++) {
1472 std::cout <<
" subblock " << i_subblock <<
":\n ";
1474 for (
external_size_type i_element = 0; i_element < subblocks_per_block; i_element++) {
1475 std::cout << reader.const_value().first <<
", ";
1478 std::cout << std::endl;
1485 reader_type reader(bids_.begin(), bids_.end(), &block_cache_);
1487 std::cout <<
"number of buckets: " << buckets_.size() << std::endl;
1489 const bucket_type&
bucket = buckets_[i_bucket];
1492 std::cout <<
" bucket " << i_bucket <<
": block=" << bucket.
i_block_ <<
", subblock=" << bucket.
i_subblock_ <<
", external=" << bucket.
n_external_ << std::endl;
1495 std::cout <<
" internal_list=";
1497 std::cout << node->
value_.first <<
" (del=" << node->
deleted() <<
"), ";
1498 node = node->
next();
1500 std::cout << std::endl;
1502 std::cout <<
" external=";
1504 std::cout << reader.const_value().first <<
", ";
1507 std::cout << std::endl;
1513 std::cout <<
"number of buckets: " << buckets_.size() << std::endl;
1515 const bucket_type&
bucket = buckets_[i_bucket];
1516 std::cout <<
" bucket " << i_bucket <<
": block=" << bucket.
i_block_ <<
", subblock=" << bucket.
i_subblock_ <<
", external=" << bucket.
n_external_ <<
", list=" << bucket.
list_ << std::endl;
1523 struct equal_to :
public std::binary_function<key_type, key_type, bool>
1534 return m_map._eq(x, y);
1551 return equal_to(*
this);
1565 const bucket_type& b = buckets_[i_bucket];
1573 double avg_external = (double)sum_external / (
double)buckets_.size();
1574 double std_external = sqrt(((
double)square_sum_external / (
double)buckets_.size()) - (avg_external * avg_external));
1576 o <<
"Bucket count : " << buckets_.size() << std::endl;
1577 o <<
"Values total : " << num_total_ << std::endl;
1578 o <<
"Values buffered : " << buffer_size_ << std::endl;
1579 o <<
"Max Buffer-Size : " << max_buffer_size_ << std::endl;
1580 o <<
"Max external/bucket : " << max_external << std::endl;
1581 o <<
"Avg external/bucket : " << avg_external << std::endl;
1582 o <<
"Std external/bucket : " << std_external << std::endl;
1583 o <<
"Load-factor : " << load_factor() << std::endl;
1584 o <<
"Blocks allocated : " << bids_.size() <<
" => " << (bids_.size() * block_type::raw_size) <<
" bytes" << std::endl;
1585 o <<
"Bytes per value : " << ((double)(bids_.size() * block_type::raw_size) / (
double)num_total_) << std::endl;
1595 template <
class KeyType,
class MappedType,
class HashType,
class KeyCompareType,
1596 unsigned SubBlockSize,
unsigned SubBlocksPerBlock,
class AllocType>
1598 SubBlockSize, SubBlocksPerBlock, AllocType>& a,
1600 SubBlockSize, SubBlocksPerBlock, AllocType>& b)
1608 #endif // !STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
UniqueValueStream(InputStream &input, self_type &map)
iterator2stream< InputIterator > streamify(InputIterator begin, InputIterator end)
Input iterator range to stream converter.
void print_statistics(std::ostream &o=std::cout) const
Print short general statistics to output stream.
external_size_type n_not_found
Additional information about a stored value:
internal_size_type i_bucket_
index of current bucket
KeyType key_type
type of the keys being used
AddHashStream(InputStream &input, self_type &map)
void _put_node(node_type *node)
Free given node.
void rehash(internal_size_type n=0)
Rehash with (at least) n buckets.
void erase(const_iterator it)
Erase value by iterator.
second_type second
Second tuple component.
void fix_iterators_all2end()
Update all iterators and make them point to the end of the hash-map (used by clear()) ...
void reset_statistics()
Reset all counters to zero.
iterator_map< self_type > iterator_map_type
for tracking active iterators
void insert(InputIterator f, InputIterator l, internal_size_type mem)
Bulk-insert of values in the range [f, l)
internal_size_type buffer_size_
size of internal-memory buffer in number of entries
void print_statistics(std::ostream &o=std::cout) const
external_size_type bucket_size_
key_type first_argument_type
C++11 required type.
internal_size_type i_bucket_
std::vector< bid_type > bid_container_type
container for block-bids
A model of stream that retrieves the data from an input iterator. For convenience use streamify funct...
internal_size_type buffer_size() const
Number of bytes occupied by buffer.
float opt_load_factor() const
Get desired load-factor.
iterator begin()
Returns an iterator pointing to the beginning of the hash-map.
Will return from its input-stream all values that are to be stored in the given bucket.
Iterator _begin() const
iterator pointing to the beginnning of the hash-map
void print_load_statistics(std::ostream &o=std::cout) const
Even more statistics: Number of buckets, number of values, buffer-size, values per bucket...
bool deleted()
check if the next node is deleted.
node< value_type > node_type
nodes for internal-memory buffer
key_type second_argument_type
C++11 required type.
equal_to(const self_type &map)
constructor requires reference to hash_map
hash_map_const_iterator< self_type > const_iterator
external_size_type max_size() const
The hash-map may store up to this number of values.
node_type * _find_key_internal(const bucket_type &bucket, const key_type &key) const
Locate the given key in the internal-memory chained list.
static uint_pair min()
return an uint_pair instance containing the smallest value possible
subblock_type * get_subblock(const bid_type &bid, unsigned_type i_subblock)
Retrieve a subblock from the cache. If not yet cached, only the subblock will be loaded.
const self_type & m_map
reference to hash_map
void max_buffer_size(internal_size_type buffer_size)
Set maximum buffer size.
key_equal key_eq() const
Constructed equality predicate used by this hash-map.
hash_map(internal_size_type n=0, const hasher &hf=hasher(), const key_compare &cmp=key_compare(), internal_size_type buffer_size=128 *1024 *1024, const allocator_type &a=allocator_type())
Construct a new hash-map.
first_type first
First tuple component.
void fix_iterators_2ext(internal_size_type i_bucket_old, const key_type &key, internal_size_type i_bucket_new, external_size_type i_ext)
Update iterators with given key and bucket and make them point to the specified location in external ...
bool _gt(const key_type &a, const key_type &b) const
true iff a > b
hasher hash_function() const
Hash-function used by this hash-map.
uint_pair & operator++()
prefix increment operator (directly manipulates the integer parts)
node_allocator_type node_allocator_
used to allocate new nodes for internal buffer
block_type::bid_type bid_type
block-identifier for blocks
tuple< external_size_type, value_type > _find_key_external(const bucket_type &bucket, const key_type &key) const
Search for key in external part of bucket.
external_size_type num_total_
(estimated) number of values
stxxl::internal_size_type internal_size_type
iterator end()
Returns an iterator pointing to the end of the hash-map.
void opt_load_factor(float z)
Set desired load-factor.
HashedValue< self_type > hashed_value_type
void fix_iterators_2int(internal_size_type i_bucket, const key_type &key, node_type *node)
Update iterators with given key and bucket and make them point to the specified node in internal memo...
buffered_reader< block_cache_type, bid_iterator_type > reader_type
Comparator object for values as required by stxxl::sort.
block_cache_type block_cache_
internal_size_type i_subblock_
index of first subblock
bool result_type
C++11 required type.
void _dump_bucket_statistics()
const_iterator begin() const
Returns a const_interator pointing to the beginning of the hash-map.
void delete_blocks(const BIDIteratorClass &bidbegin, const BIDIteratorClass &bidend)
Deallocates blocks.
Tuning parameters for external memory hash map.
std::pair< internal_size_type, typename InputStream::value_type > value_type
(hash,value)
Extracts the value-part (ignoring the hashvalue); required by HashingStream (see above) ...
block_cache< block_type > block_cache_type
bool _leq(const key_type &a, const key_type &b) const
true iff a <= b
Block containing elements of fixed length.
key_compare key_cmp() const
Strict-weak-ordering used by this hash-map.
Stream for filtering duplicates.
internal_size_type _bkt_num(const key_type &key, internal_size_type n) const
Bucket-index for values with given key.
const_iterator end() const
Returns a const_iterator pointing to the end of the hash-map.
bool oblivious_
false if the total-number of values is correct (false) or true if estimated (true); see *oblivious_-m...
external_size_type erase(const key_type &key)
Erase value by key; check external memory.
void swap(self_type &obj)
Exchange stored values with another hash-map.
equal_to key_equal
Type of constructed equality predicate.
void _make_conscious()
After using *oblivious_-methods only an estimate for the total number of elements can be given...
HashType hasher
type of (mother) hash-function
void _rebuild_buckets(internal_size_type n_desired=0)
choose_int_types< my_pointer_size >::int_type int_type
AllocatorType allocator_type
allocator template type
internal_size_type max_buffer_size() const
Maximum buffer size in byte.
bid_container_type bids_
blocks-ids of allocated blocks
typed_block< subblock_raw_size, value_type > subblock_type
a subblock consists of subblock_size values
bucket< node_type > bucket_type
buckets
static instance_pointer get_instance()
unsigned_type internal_size_type
void _erase_nodes(node_type *first, node_type *last)
Free nodes in range [first, last). If last is NULL all nodes will be freed.
std::pair< iterator, iterator > equal_range(const key_type &key)
Finds a range containing all values with given key. Non-const access.
const value_type & const_reference
type for constant value-references
std::pair< const_iterator, const_iterator > equal_range(const key_type &key) const
Finds a range containing all values with given key. Const access.
external_size_type n_found_external
internal_size_type max_buffer_size_
maximum size for internal-memory buffer
std::vector< bucket_type > buckets_container_type
internal_size_type i_block_
index of first block's bid (to be used as index for hash_map's bids_-array
bool _geq(const key_type &a, const key_type &b) const
true iff a >= b
std::pair< internal_size_type, value_type > max_value() const
#define STXXL_BEGIN_NAMESPACE
internal_size_type _bkt_num(const key_type &key) const
Bucket-index for values with given key.
external_size_type n_external_
number of elements in external memory
InputStream::value_type value_type
key_compare cmp_
user supplied strict-weak-ordering for keys
value_type & reference
type for value-references
void clear()
Empty cache; don't write back dirty blocks.
std::pair< KeyType, MappedType > value_type
actually store (key-data)-pairs
hash_map_iterator< self_type > iterator
Produces sorted stream from input stream.
float load_factor() const
Average number of (sub)blocks occupied by a bucket.
internal_size_type bucket_index(const key_type &k) const
Bucket-index for values with given key.
node_type * _get_node()
Allocate a new buffer-node.
static uint_pair max()
return an uint_pair instance containing the largest value possible
internal_size_type i_bucket_
Stream interface for all value-pairs currently stored in the map.
bool set_deleted(bool d)
change deleted flag on the next node
Construct an equality predicate from the comparison operator.
NodeType * list_
entry point to the chain in internal memory
Iterator _end() const
iterator pointing to the end of the hash-map (iterator-type as template-parameter) ...
#define STXXL_VERBOSE_HASH_MAP(m)
hash_map(InputIterator begin, InputIterator end, internal_size_type mem_to_sort=256 *1024 *1024, internal_size_type n=0, const hasher &hf=hasher(), const key_compare &cmp=key_compare(), internal_size_type buffer_size=128 *1024 *1024, const allocator_type &a=allocator_type())
Construct a new hash-map and insert all values in the range [f,l)
KeyCompareType key_compare
functor that imposes a ordering on keys (but see _lt())
float opt_load_factor_
desired load factor after rehashing
allocator_type::template rebind< node_type >::other node_allocator_type
node_type * _new_node(const value_type &value, node_type *nxt, bool del)
Allocate a new buffer-node and initialize with given value, node and deleted-flag.
external_size_type size() const
Number of values currently stored. Note: If the correct number is currently unknown (because *_oblivo...
internal_size_type bucket_count() const
Number of buckets.
void flush()
Write all dirty blocks back to disk.
buckets_container_type buckets_
array of bucket
bool _lt(const key_type &a, const key_type &b) const
iterator find(const key_type &key)
Look up value by key. Non-const access.
uint64 external_size_type
Used to scan external memory with prefetching.
Main implementation of external memory hash map.
subblock_type * _load_subblock(const bucket_type &bucket, internal_size_type which_subblock) const
Load the given bucket's i-th subblock.
allocator_type get_allocator() const
Get node memory allocator.
source_type source_
source of current value: external or internal
typed_block< block_raw_size, subblock_type > block_type
a block consists of block_size subblocks
stxxl::external_size_type external_size_type
void reset_statistics()
Reset hash-map statistics.
bid_container_type::iterator bid_iterator_type
iterator for block-bids
InputStream::value_type value_type
HashingStream(InputStream &input, internal_size_type i_bucket, ValueExtractor vextract, self_type *map)
std::pair< internal_size_type, value_type > min_value() const
External associative container (map). Introduction to map container: see STXXL Map (B+-tree) tutor...
void clear()
Reset hash-map: erase all values, invalidate all iterators.
bool _eq(const key_type &a, const key_type &b) const
true iff a == b. note: it is mandatory that equal keys yield equal hash-values => hashing not neccess...
node_type * node_
current (source=internal) or old (src=external) internal node
subblock_type::bid_type subblock_bid_type
block-identifier for subblocks
value_type const * const_pointer
const pointer to type of keys
external_size_type count(const key_type &k) const
Number of values with given key.
MappedType mapped_type
type of the data to be stored
external_size_type n_found_internal
void print_statistics(std::ostream &o=std::cout) const
Print statistics: Number of hits/misses, blocks forced from cache or written back.
node< ValueType > * set_next(node< ValueType > *n)
change the "next" value of next node pointer
const_iterator find(const key_type &key) const
Look up value by key. Const access.
internal_size_type max_bucket_count() const
Maximum number of buckets.
Buffered writing of values. New Blocks are allocated as needed.
value_type * pointer
pointer to type of keys
external_size_type n_subblocks_loaded
ExtIterator find(ExtIterator begin, ExtIterator end, const EqualityComparable &value, int_type nbuffers=0)
External equivalent of std::find, see stxxl::find.
std::pair< iterator, bool > insert(const value_type &value)
Insert a new value if no value with the same key is already present; external memory must therefore b...
key_type key_
key of current value
iterator_map_type iterator_map_
keeps track of all active iterators
iterator insert_oblivious(const value_type &value)
Insert a value; external memory is not accessed so that another value with the same key may be overwr...
hash_map< KeyType, MappedType, HashType, KeyCompareType, SubBlockSize, SubBlocksPerBlock > self_type
hasher hash_
user supplied mother hash-function
#define STXXL_END_NAMESPACE
stxxl::int64 difference_type
node< ValueType > * next()
return the next node, without the "next" flag.
void fix_iterators_2end(internal_size_type i_bucket, const key_type &key)
Update iterators with given key and bucket and make them point to the end of the hash-map (called by ...
void erase_oblivious(const key_type &key)
Erase value by key but without looking at external memory.
bool empty() const
Check if container is empty.