14 #ifndef STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
15 #define STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
33 #define STXXL_VERBOSE_HASH_MAP(m) \
34 STXXL_VERBOSE1("hash_map[" << static_cast<const void*>(this) << "]::" << m)
52 template <
class KeyType,
56 unsigned SubBlockSize = 4*1024,
57 unsigned SubBlocksPerBlock = 256,
58 class AllocatorType = std::allocator<std::pair<const KeyType, MappedType> >
63 typedef hash_map<KeyType, MappedType, HashType, KeyCompareType,
98 block_raw_size = SubBlocksPerBlock * SubBlockSize,
99 subblock_raw_size = SubBlockSize
104 subblocks_per_block = SubBlocksPerBlock,
187 block_cache_(
tuning::get_instance()->blockcache_size),
191 opt_load_factor_(0.875)
193 max_buffer_size_ = buffer_size /
sizeof(node_type);
209 template <
class InputIterator>
223 block_cache_(
tuning::get_instance()->blockcache_size),
227 opt_load_factor_(0.875)
229 max_buffer_size_ = buffer_size /
sizeof(node_type);
230 insert(begin, end, mem_to_sort);
249 {
return node_allocator_; }
265 reader_type reader(bids_.begin(), bids_.end(), block_cache_);
266 values_stream_type values(buckets_.begin(), buckets_.end(),
267 reader, bids_.begin(), *
this);
270 while (!values.empty())
312 if (buckets_.size() == 0)
313 _rebuild_buckets(128);
316 bucket_type&
bucket = buckets_[i_bucket];
317 node_type*
node = _find_key_internal(bucket, value.first);
320 if (node && _eq(node->
value_.first, value.first))
322 bool old_deleted = node->
deleted();
329 return std::pair<iterator, bool>(
331 0, src_internal,
false, value.first), old_deleted);
338 = _find_key_external(bucket, value.first);
344 if (i_external < bucket.
n_external_ && _eq(ext_value.first, value.first))
346 return std::pair<iterator, bool>(
348 i_external, src_external,
true, value.first),
false);
354 node_type* new_node =
357 : (bucket.
list_ = _new_node(value, bucket.
list_,
false));
359 iterator it(
this, i_bucket, new_node,
360 0, src_internal,
false, value.first);
363 if (buffer_size_ >= max_buffer_size_)
366 return std::pair<iterator, bool>(it,
true);
378 bucket_type&
bucket = buckets_[i_bucket];
379 node_type*
node = _find_key_internal(bucket, value.first);
382 if (node && _eq(node->
value_.first, value.first))
389 return iterator(
this, i_bucket, node,
390 0, src_internal,
false, value.first);
398 node_type* new_node =
401 : (bucket.
list_ = _new_node(value, bucket.
list_,
false));
408 iterator it(
this, i_bucket, new_node,
409 0, src_internal,
false, value.first);
412 if (buffer_size_ >= max_buffer_size_)
426 if (it.
source_ == src_internal)
433 node_type*
node = _find_key_internal(bucket, it.
key_);
434 assert(!node || !_eq(node->
value_.first, it.
key_));
445 if (buffer_size_ >= max_buffer_size_)
456 bucket_type&
bucket = buckets_[i_bucket];
457 node_type*
node = _find_key_internal(bucket, key);
460 if (node && _eq(node->
value_.first, key))
476 = _find_key_external(bucket, key);
482 if (i_external < bucket.
n_external_ && _eq(ext_value.first, key))
494 if (buffer_size_ >= max_buffer_size_)
510 bucket_type&
bucket = buckets_[i_bucket];
511 node_type*
node = _find_key_internal(bucket, key);
514 if (node && _eq(node->
value_.first, key))
537 if (buffer_size_ >= max_buffer_size_)
548 block_cache_.
flush();
549 block_cache_.
clear();
553 i_bucket < buckets_.size(); i_bucket++)
555 _erase_nodes(buckets_[i_bucket].list_, NULL);
556 buckets_[i_bucket] = bucket_type();
573 std::swap(bids_, obj.
bids_);
580 std::swap(hash_, obj.
hash_);
581 std::swap(cmp_, obj.
cmp_);
605 n_subblocks_loaded = n_found_external = n_found_internal = n_not_found = 0;
611 o <<
"Find-statistics:" << std::endl;
612 o <<
" Found internal : " << n_found_internal << std::endl;
613 o <<
" Found external : " << n_found_external << std::endl;
614 o <<
" Not found : " << n_not_found << std::endl;
615 o <<
" Subblocks searched : " << n_subblocks_loaded << std::endl;
625 if (buffer_size_ + 1 >= max_buffer_size_)
629 bucket_type&
bucket = buckets_[i_bucket];
630 node_type*
node = _find_key_internal(bucket, key);
633 if (node && _eq(node->
value_.first, key)) {
636 return this->_end<iterator>();
638 return iterator(
this, i_bucket, node, 0, src_internal,
false, key);
643 = _find_key_external(bucket, key);
649 if (i_external < bucket.
n_external_ && _eq(value.first, key)) {
659 node_type* new_node =
662 : (bucket.
list_ = _new_node(value, bucket.
list_,
false));
668 return iterator(
this, i_bucket, new_node, i_external + 1, src_internal,
true, key);
673 return this->_end<iterator>();
683 const bucket_type&
bucket = buckets_[i_bucket];
684 node_type*
node = _find_key_internal(bucket, key);
687 if (node && _eq(node->
value_.first, key)) {
690 return this->_end<const_iterator>();
697 = _find_key_external(bucket, key);
703 if (i_external < bucket.
n_external_ && _eq(value.first, key)) {
710 return this->_end<const_iterator>();
721 return (cit == end()) ? 0 : 1;
730 return std::pair<iterator, iterator>(it, it);
739 return std::pair<const_iterator, const_iterator>(cit, cit);
746 if (buffer_size_ + 1 >= max_buffer_size_)
750 bucket_type&
bucket = buckets_[i_bucket];
751 node_type*
node = _find_key_internal(bucket, key);
754 if (node && _eq(node->
value_.first, key)) {
760 return node->
value_.second;
765 = _find_key_external(bucket, key);
771 (i_external < bucket.
n_external_ && _eq(found_value.first, key))
778 node_type* new_node =
780 ? node->
set_next(_new_node(buffer_value, node->
next(),
false))
781 : (bucket.
list_ = _new_node(buffer_value, bucket.
list_,
false));
786 return new_node->
value_.second;
792 {
return buckets_.size(); }
800 {
return _bkt_num(k); }
805 {
return (
float)num_total_ / ((float)subblock_size * (
float)buckets_.size()); }
813 opt_load_factor_ = z;
814 if (load_factor() > opt_load_factor_)
828 return buffer_size_ *
sizeof(node_type);
834 return max_buffer_size_ *
sizeof(node_type);
841 max_buffer_size_ = buffer_size /
sizeof(node_type);
842 if (buffer_size_ >= max_buffer_size_)
848 template <
class Iterator>
853 if (buckets_.size() == 0)
854 return _end<Iterator>();
857 Iterator it(non_const_this, 0, buckets_[0].list_,
866 template <
class Iterator>
870 return Iterator(non_const_this);
890 return node_allocator_.allocate(1);
896 node_allocator_.deallocate(node, 1);
903 node_type*
node = _get_node();
914 node_type* curr = first;
917 node_type* next = curr->
next();
926 return _bkt_num(key, buckets_.size());
958 node_type* old = NULL;
959 for (node_type* curr = bucket.
list_;
960 curr && _leq(curr->value_.first, key);
975 subblock_type* subblock;
985 i_subblock < n_subblocks; i_subblock++)
987 subblock = _load_subblock(bucket, i_subblock);
990 (i_subblock + 1 < n_subblocks)
999 if (_lt((*subblock)[n_values - 1].first, key))
1004 while (i_lower + 1 != i_upper)
1007 if (_leq((*subblock)[i_middle].first, key))
1015 if (_eq(value.first, key))
1017 (i_subblock * subblock_size + i_lower, value);
1036 n_subblocks_loaded++;
1047 return block_cache_.
get_subblock(bid, i_subblock_within);
1056 {
return hvalue.
value_; }
1064 template <
class InputStream,
class ValueExtractor>
1081 i_bucket_(i_bucket),
1085 empty_ = find_next();
1095 empty_ = find_next();
1103 if (map_->_bkt_num(vextract_(value_).first) != i_bucket_)
1118 typedef HashingStream<values_stream_type, HashedValueExtractor> hashing_stream_type;
1120 const int_type write_buffer_size = config::get_instance()->disks_number() * 4;
1124 n_new = (
internal_size_type)ceil((
double)num_total_ / ((double)subblock_size * (
double)opt_load_factor()));
1127 if (n_desired > n_new)
1128 n_new = std::min<internal_size_type>(n_desired, max_bucket_count());
1131 buckets_container_type old_buckets(n_new);
1132 std::swap(buckets_, old_buckets);
1134 bid_container_type old_bids;
1135 std::swap(bids_, old_bids);
1141 =
new reader_type(old_bids.begin(), old_bids.end(), block_cache_);
1143 values_stream_type values_stream(old_buckets.begin(), old_buckets.end(),
1144 *reader, old_bids.begin(), *
this);
1146 writer_type writer(&bids_, write_buffer_size, write_buffer_size / 2);
1155 i_bucket < buckets_.size(); i_bucket++)
1157 buckets_[i_bucket] = bucket_type();
1158 buckets_[i_bucket].i_block_ = writer.i_block();
1159 buckets_[i_bucket].i_subblock_ = writer.i_subblock();
1161 hashing_stream_type
hasher(values_stream, i_bucket, HashedValueExtractor(),
this);
1163 while (!hasher.empty())
1165 const hashed_value_type& hvalue = *hasher;
1168 writer.append(hvalue.
value_);
1173 writer.finish_subblock();
1174 buckets_[i_bucket].n_external_ = hasher.bucket_size_;
1175 num_total_ += hasher.bucket_size_;
1181 block_cache_.
clear();
1188 i_bucket < old_buckets.size(); i_bucket++)
1190 _erase_nodes(old_buckets[i_bucket].list_, NULL);
1191 old_buckets[i_bucket] = bucket_type();
1203 template <
class InputStream>
1211 : map_(map), in_(input)
1214 bool empty()
const {
return in_.empty(); }
1222 while (!in_.empty() && v_old.first == (*in_).first)
1227 template <
class InputStream>
1231 typedef std::pair<internal_size_type, typename InputStream::value_type>
value_type;
1236 : map_(map), in_(input)
1239 bool empty()
const {
return in_.empty(); }
1242 {
return value_type(map_.hash_((*in_).first), *in_); }
1253 const value_type& operator () (std::pair<internal_size_type, value_type>& v)
1254 {
return v.second; }
1262 struct Cmp :
public std::binary_function<
1263 std::pair<internal_size_type, value_type>,
1264 std::pair<internal_size_type, value_type>, bool
1270 bool operator () (
const std::pair<internal_size_type, value_type>& a,
1271 const std::pair<internal_size_type, value_type>& b)
const
1273 return (a.first < b.first) ||
1274 ((a.first == b.first) && map_.cmp_(a.second.first, b.second.first));
1278 return std::pair<internal_size_type, value_type>(
1285 return std::pair<internal_size_type, value_type>(
1297 template <
class InputIterator>
1303 typedef HashingStream<old_values_stream, HashedValueExtractor> old_hashing_stream;
1309 typedef AddHashStream<input_stream> new_values_stream;
1313 typedef UniqueValueStream<new_sorted_values_stream> new_unique_values_stream;
1315 typedef HashingStream<new_unique_values_stream, StripHashFunctor> new_hashing_stream;
1319 int_type write_buffer_size = config::get_instance()->disks_number() * 2;
1324 if (n_buckets_new > max_bucket_count())
1325 n_buckets_new = max_bucket_count();
1331 std::swap(buckets_, old_buckets);
1333 bid_container_type old_bids;
1334 std::swap(bids_, old_bids);
1337 reader_type* reader =
new reader_type(old_bids.begin(), old_bids.end(),
1339 old_values_stream old_values(old_buckets.begin(), old_buckets.end(),
1340 *reader, old_bids.begin(), *
this);
1344 new_values_stream new_values(input, *
this);
1345 new_sorted_values_stream new_sorted_values(new_values, Cmp(*
this), mem);
1346 new_unique_values_stream new_unique_values(new_sorted_values, *
this);
1348 writer_type writer(&bids_, write_buffer_size, write_buffer_size / 2);
1353 buckets_[i_bucket] = bucket_type();
1354 buckets_[i_bucket].i_block_ = writer.i_block();
1355 buckets_[i_bucket].i_subblock_ = writer.i_subblock();
1357 old_hashing_stream old_hasher(old_values, i_bucket, HashedValueExtractor(),
this);
1358 new_hashing_stream new_hasher(new_unique_values, i_bucket, StripHashFunctor(),
this);
1362 while (!old_hasher.empty() && !new_hasher.empty())
1366 key_type old_key = (*old_hasher).value_.first;
1367 key_type new_key = (*new_hasher).second.first;
1370 if ((old_hash < new_hash) || (old_hash == new_hash && cmp_(old_key, new_key)))
1372 const hashed_value_type& hvalue = *old_hasher;
1374 writer.append(hvalue.
value_);
1380 if (_eq(old_key, new_key))
1382 const hashed_value_type& hvalue = *old_hasher;
1386 writer.append((*new_hasher).second);
1392 while (!old_hasher.empty())
1394 const hashed_value_type& hvalue = *old_hasher;
1396 writer.append(hvalue.
value_);
1401 while (!new_hasher.empty())
1403 writer.append((*new_hasher).second);
1408 writer.finish_subblock();
1409 buckets_[i_bucket].n_external_ = bucket_size;
1410 num_total_ += bucket_size;
1414 block_cache_.
clear();
1422 i_bucket < old_buckets.size(); i_bucket++)
1424 _erase_nodes(old_buckets[i_bucket].list_, NULL);
1425 old_buckets[i_bucket] = bucket_type();
1441 return (hash_a < hash_b) ||
1442 ((hash_a == hash_b) && cmp_(a, b));
1455 {
return !cmp_(a, b) && !cmp_(b, a); }
1467 reader_type reader(bids_.begin(), bids_.end(), &block_cache_);
1470 std::cout <<
"block " << i_block <<
":\n";
1472 for (
internal_size_type i_subblock = 0; i_subblock < subblocks_per_block; i_subblock++) {
1473 std::cout <<
" subblock " << i_subblock <<
":\n ";
1475 for (
external_size_type i_element = 0; i_element < subblocks_per_block; i_element++) {
1476 std::cout << reader.const_value().first <<
", ";
1479 std::cout << std::endl;
1486 reader_type reader(bids_.begin(), bids_.end(), &block_cache_);
1488 std::cout <<
"number of buckets: " << buckets_.size() << std::endl;
1490 const bucket_type&
bucket = buckets_[i_bucket];
1493 std::cout <<
" bucket " << i_bucket <<
": block=" << bucket.
i_block_ <<
", subblock=" << bucket.
i_subblock_ <<
", external=" << bucket.
n_external_ << std::endl;
1496 std::cout <<
" internal_list=";
1498 std::cout << node->
value_.first <<
" (del=" << node->
deleted() <<
"), ";
1499 node = node->
next();
1501 std::cout << std::endl;
1503 std::cout <<
" external=";
1505 std::cout << reader.const_value().first <<
", ";
1508 std::cout << std::endl;
1514 std::cout <<
"number of buckets: " << buckets_.size() << std::endl;
1516 const bucket_type&
bucket = buckets_[i_bucket];
1517 std::cout <<
" bucket " << i_bucket <<
": block=" << bucket.
i_block_ <<
", subblock=" << bucket.
i_subblock_ <<
", external=" << bucket.
n_external_ <<
", list=" << bucket.
list_ << std::endl;
1524 struct equal_to :
public std::binary_function<key_type, key_type, bool>
1535 return m_map._eq(x, y);
1552 return equal_to(*
this);
1566 const bucket_type& b = buckets_[i_bucket];
1574 double avg_external = (double)sum_external / (
double)buckets_.size();
1575 double std_external = sqrt(((
double)square_sum_external / (
double)buckets_.size()) - (avg_external * avg_external));
1577 o <<
"Bucket count : " << buckets_.size() << std::endl;
1578 o <<
"Values total : " << num_total_ << std::endl;
1579 o <<
"Values buffered : " << buffer_size_ << std::endl;
1580 o <<
"Max Buffer-Size : " << max_buffer_size_ << std::endl;
1581 o <<
"Max external/bucket : " << max_external << std::endl;
1582 o <<
"Avg external/bucket : " << avg_external << std::endl;
1583 o <<
"Std external/bucket : " << std_external << std::endl;
1584 o <<
"Load-factor : " << load_factor() << std::endl;
1585 o <<
"Blocks allocated : " << bids_.size() <<
" => " << (bids_.size() * block_type::raw_size) <<
" bytes" << std::endl;
1586 o <<
"Bytes per value : " << ((double)(bids_.size() * block_type::raw_size) / (
double)num_total_) << std::endl;
1596 template <
class KeyType,
class MappedType,
class HashType,
class KeyCompareType,
1597 unsigned SubBlockSize,
unsigned SubBlocksPerBlock,
class AllocType>
1599 SubBlockSize, SubBlocksPerBlock, AllocType>& a,
1601 SubBlockSize, SubBlocksPerBlock, AllocType>& b)
1609 #endif // !STXXL_CONTAINERS_HASH_MAP_HASH_MAP_HEADER
UniqueValueStream(InputStream &input, self_type &map)
iterator2stream< InputIterator > streamify(InputIterator begin, InputIterator end)
Input iterator range to stream converter.
void print_statistics(std::ostream &o=std::cout) const
Print short general statistics to output stream.
external_size_type n_not_found
Additional information about a stored value:
internal_size_type i_bucket_
index of current bucket
KeyType key_type
type of the keys being used
AddHashStream(InputStream &input, self_type &map)
void _put_node(node_type *node)
Free given node.
void rehash(internal_size_type n=0)
Rehash with (at least) n buckets.
void erase(const_iterator it)
Erase value by iterator.
second_type second
Second tuple component.
void fix_iterators_all2end()
Update all iterators and make them point to the end of the hash-map (used by clear()) ...
void reset_statistics()
Reset all counters to zero.
iterator_map< self_type > iterator_map_type
for tracking active iterators
void insert(InputIterator f, InputIterator l, internal_size_type mem)
Bulk-insert of values in the range [f, l)
internal_size_type buffer_size_
size of internal-memory buffer in number of entries
void print_statistics(std::ostream &o=std::cout) const
external_size_type bucket_size_
key_type first_argument_type
C++11 required type.
internal_size_type i_bucket_
std::vector< bid_type > bid_container_type
container for block-bids
A model of stream that retrieves the data from an input iterator. For convenience use streamify funct...
internal_size_type buffer_size() const
Number of bytes occupied by buffer.
float opt_load_factor() const
Get desired load-factor.
iterator begin()
Returns an iterator pointing to the beginning of the hash-map.
Will return from its input-stream all values that are to be stored in the given bucket.
Iterator _begin() const
iterator pointing to the beginnning of the hash-map
void print_load_statistics(std::ostream &o=std::cout) const
Even more statistics: Number of buckets, number of values, buffer-size, values per bucket...
bool deleted()
check if the next node is deleted.
node< value_type > node_type
nodes for internal-memory buffer
key_type second_argument_type
C++11 required type.
equal_to(const self_type &map)
constructor requires reference to hash_map
hash_map_const_iterator< self_type > const_iterator
external_size_type max_size() const
The hash-map may store up to this number of values.
node_type * _find_key_internal(const bucket_type &bucket, const key_type &key) const
Locate the given key in the internal-memory chained list.
static uint_pair min()
return an uint_pair instance containing the smallest value possible
subblock_type * get_subblock(const bid_type &bid, unsigned_type i_subblock)
Retrieve a subblock from the cache. If not yet cached, only the subblock will be loaded.
const self_type & m_map
reference to hash_map
void max_buffer_size(internal_size_type buffer_size)
Set maximum buffer size.
key_equal key_eq() const
Constructed equality predicate used by this hash-map.
hash_map(internal_size_type n=0, const hasher &hf=hasher(), const key_compare &cmp=key_compare(), internal_size_type buffer_size=128 *1024 *1024, const allocator_type &a=allocator_type())
Construct a new hash-map.
first_type first
First tuple component.
void fix_iterators_2ext(internal_size_type i_bucket_old, const key_type &key, internal_size_type i_bucket_new, external_size_type i_ext)
Update iterators with given key and bucket and make them point to the specified location in external ...
bool _gt(const key_type &a, const key_type &b) const
true iff a > b
hasher hash_function() const
Hash-function used by this hash-map.
uint_pair & operator++()
prefix increment operator (directly manipulates the integer parts)
node_allocator_type node_allocator_
used to allocate new nodes for internal buffer
block_type::bid_type bid_type
block-identifier for blocks
tuple< external_size_type, value_type > _find_key_external(const bucket_type &bucket, const key_type &key) const
Search for key in external part of bucket.
external_size_type num_total_
(estimated) number of values
stxxl::internal_size_type internal_size_type
iterator end()
Returns an iterator pointing to the end of the hash-map.
void opt_load_factor(float z)
Set desired load-factor.
HashedValue< self_type > hashed_value_type
void fix_iterators_2int(internal_size_type i_bucket, const key_type &key, node_type *node)
Update iterators with given key and bucket and make them point to the specified node in internal memo...
buffered_reader< block_cache_type, bid_iterator_type > reader_type
Comparator object for values as required by stxxl::sort.
block_cache_type block_cache_
internal_size_type i_subblock_
index of first subblock
bool result_type
C++11 required type.
void _dump_bucket_statistics()
const_iterator begin() const
Returns a const_interator pointing to the beginning of the hash-map.
void delete_blocks(const BIDIteratorClass &bidbegin, const BIDIteratorClass &bidend)
Deallocates blocks.
Tuning parameters for external memory hash map.
std::pair< internal_size_type, typename InputStream::value_type > value_type
(hash,value)
Extracts the value-part (ignoring the hashvalue); required by HashingStream (see above) ...
block_cache< block_type > block_cache_type
bool _leq(const key_type &a, const key_type &b) const
true iff a <= b
Block containing elements of fixed length.
key_compare key_cmp() const
Strict-weak-ordering used by this hash-map.
Stream for filtering duplicates.
internal_size_type _bkt_num(const key_type &key, internal_size_type n) const
Bucket-index for values with given key.
const_iterator end() const
Returns a const_iterator pointing to the end of the hash-map.
bool oblivious_
false if the total-number of values is correct (false) or true if estimated (true); see *oblivious_-m...
external_size_type erase(const key_type &key)
Erase value by key; check external memory.
void swap(self_type &obj)
Exchange stored values with another hash-map.
equal_to key_equal
Type of constructed equality predicate.
void _make_conscious()
After using *oblivious_-methods only an estimate for the total number of elements can be given...
HashType hasher
type of (mother) hash-function
void _rebuild_buckets(internal_size_type n_desired=0)
choose_int_types< my_pointer_size >::int_type int_type
AllocatorType allocator_type
allocator template type
internal_size_type max_buffer_size() const
Maximum buffer size in byte.
bid_container_type bids_
blocks-ids of allocated blocks
typed_block< subblock_raw_size, value_type > subblock_type
a subblock consists of subblock_size values
bucket< node_type > bucket_type
buckets
static instance_pointer get_instance()
unsigned_type internal_size_type
void _erase_nodes(node_type *first, node_type *last)
Free nodes in range [first, last). If last is NULL all nodes will be freed.
std::pair< iterator, iterator > equal_range(const key_type &key)
Finds a range containing all values with given key. Non-const access.
const value_type & const_reference
type for constant value-references
std::pair< const_iterator, const_iterator > equal_range(const key_type &key) const
Finds a range containing all values with given key. Const access.
external_size_type n_found_external
internal_size_type max_buffer_size_
maximum size for internal-memory buffer
std::vector< bucket_type > buckets_container_type
internal_size_type i_block_
index of first block's bid (to be used as index for hash_map's bids_-array
bool _geq(const key_type &a, const key_type &b) const
true iff a >= b
std::pair< internal_size_type, value_type > max_value() const
#define STXXL_BEGIN_NAMESPACE
internal_size_type _bkt_num(const key_type &key) const
Bucket-index for values with given key.
external_size_type n_external_
number of elements in external memory
InputStream::value_type value_type
key_compare cmp_
user supplied strict-weak-ordering for keys
value_type & reference
type for value-references
void clear()
Empty cache; don't write back dirty blocks.
std::pair< KeyType, MappedType > value_type
actually store (key-data)-pairs
hash_map_iterator< self_type > iterator
Produces sorted stream from input stream.
float load_factor() const
Average number of (sub)blocks occupied by a bucket.
internal_size_type bucket_index(const key_type &k) const
Bucket-index for values with given key.
node_type * _get_node()
Allocate a new buffer-node.
static uint_pair max()
return an uint_pair instance containing the largest value possible
internal_size_type i_bucket_
Stream interface for all value-pairs currently stored in the map.
bool set_deleted(bool d)
change deleted flag on the next node
Construct an equality predicate from the comparison operator.
NodeType * list_
entry point to the chain in internal memory
Iterator _end() const
iterator pointing to the end of the hash-map (iterator-type as template-parameter) ...
#define STXXL_VERBOSE_HASH_MAP(m)
hash_map(InputIterator begin, InputIterator end, internal_size_type mem_to_sort=256 *1024 *1024, internal_size_type n=0, const hasher &hf=hasher(), const key_compare &cmp=key_compare(), internal_size_type buffer_size=128 *1024 *1024, const allocator_type &a=allocator_type())
Construct a new hash-map and insert all values in the range [f,l)
KeyCompareType key_compare
functor that imposes a ordering on keys (but see _lt())
float opt_load_factor_
desired load factor after rehashing
allocator_type::template rebind< node_type >::other node_allocator_type
node_type * _new_node(const value_type &value, node_type *nxt, bool del)
Allocate a new buffer-node and initialize with given value, node and deleted-flag.
external_size_type size() const
Number of values currently stored. Note: If the correct number is currently unknown (because *_oblivo...
internal_size_type bucket_count() const
Number of buckets.
void flush()
Write all dirty blocks back to disk.
buckets_container_type buckets_
array of bucket
bool _lt(const key_type &a, const key_type &b) const
iterator find(const key_type &key)
Look up value by key. Non-const access.
uint64 external_size_type
Used to scan external memory with prefetching.
Main implementation of external memory hash map.
subblock_type * _load_subblock(const bucket_type &bucket, internal_size_type which_subblock) const
Load the given bucket's i-th subblock.
allocator_type get_allocator() const
Get node memory allocator.
source_type source_
source of current value: external or internal
typed_block< block_raw_size, subblock_type > block_type
a block consists of block_size subblocks
stxxl::external_size_type external_size_type
void reset_statistics()
Reset hash-map statistics.
bid_container_type::iterator bid_iterator_type
iterator for block-bids
InputStream::value_type value_type
HashingStream(InputStream &input, internal_size_type i_bucket, ValueExtractor vextract, self_type *map)
std::pair< internal_size_type, value_type > min_value() const
External associative container (map). Introduction to map container: see STXXL Map (B+-tree) tutor...
void clear()
Reset hash-map: erase all values, invalidate all iterators.
bool _eq(const key_type &a, const key_type &b) const
true iff a == b. note: it is mandatory that equal keys yield equal hash-values => hashing not neccess...
node_type * node_
current (source=internal) or old (src=external) internal node
subblock_type::bid_type subblock_bid_type
block-identifier for subblocks
value_type const * const_pointer
const pointer to type of keys
external_size_type count(const key_type &k) const
Number of values with given key.
MappedType mapped_type
type of the data to be stored
external_size_type n_found_internal
void print_statistics(std::ostream &o=std::cout) const
Print statistics: Number of hits/misses, blocks forced from cache or written back.
node< ValueType > * set_next(node< ValueType > *n)
change the "next" value of next node pointer
const_iterator find(const key_type &key) const
Look up value by key. Const access.
internal_size_type max_bucket_count() const
Maximum number of buckets.
Buffered writing of values. New Blocks are allocated as needed.
value_type * pointer
pointer to type of keys
external_size_type n_subblocks_loaded
ExtIterator find(ExtIterator begin, ExtIterator end, const EqualityComparable &value, int_type nbuffers=0)
External equivalent of std::find, see stxxl::find.
std::pair< iterator, bool > insert(const value_type &value)
Insert a new value if no value with the same key is already present; external memory must therefore b...
key_type key_
key of current value
iterator_map_type iterator_map_
keeps track of all active iterators
iterator insert_oblivious(const value_type &value)
Insert a value; external memory is not accessed so that another value with the same key may be overwr...
hash_map< KeyType, MappedType, HashType, KeyCompareType, SubBlockSize, SubBlocksPerBlock > self_type
hasher hash_
user supplied mother hash-function
#define STXXL_END_NAMESPACE
stxxl::int64 difference_type
node< ValueType > * next()
return the next node, without the "next" flag.
void fix_iterators_2end(internal_size_type i_bucket, const key_type &key)
Update iterators with given key and bucket and make them point to the end of the hash-map (called by ...
void erase_oblivious(const key_type &key)
Erase value by key but without looking at external memory.
bool empty() const
Check if container is empty.