Stxxl  1.3.2
containers/berkeley_db_benchmark.cpp

This is a benchmark mentioned in the paper R. Dementiev, L. Kettner, P. Sanders "STXXL: standard template library for XXL data sets" Software: Practice and Experience Volume 38, Issue 6, Pages 589-637, May 2008 DOI: 10.1002/spe.844

/***************************************************************************
* containers/berkeley_db_benchmark.cpp
*
* Part of the STXXL. See http://stxxl.sourceforge.net
*
* Copyright (C) 2006 Roman Dementiev <[email protected]>
* Copyright (C) 2009, 2010 Andreas Beckmann <[email protected]>
*
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
**************************************************************************/
#include <stxxl/vector>
#include <stxxl/map>
#include <stxxl/timer>
#include <stxxl/stream>
#include <db_cxx.h>
#include "app_config.h"
#include <portability.h>
#include <ami_btree.h>
#define BDB_BULK_SCAN
#define KEY_SIZE 8
#define DATA_SIZE 32
#define NODE_BLOCK_SIZE (32 * 1024)
#define LEAF_BLOCK_SIZE (32 * 1024)
#define LEAF_BLOCK_SIZE (32 * 1024)
#define TOTAL_CACHE_SIZE (750 * 1024 * 1024)
//#define TOTAL_CACHE_SIZE (150 * 1024 * 1024)
//#define NODE_CACHE_SIZE (1 * (TOTAL_CACHE_SIZE / 40))
//#define LEAF_CACHE_SIZE (39 * (TOTAL_CACHE_SIZE / 40))
#define NODE_CACHE_SIZE (64 * 1024 * 1024)
#define LEAF_CACHE_SIZE (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE)
#define SORTER_MEM (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4)
#define SCAN_LIMIT(x) (x)
//#define BDB_FILE "/data3/bdb_file"
#define BDB_FILE "/var/tmp/bdb_file"
// BDB settings
u_int32_t pagesize = LEAF_BLOCK_SIZE;
u_int bulkbufsize = 4 * 1024 * 1024;
u_int logbufsize = 8 * 1024 * 1024;
u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024);
u_int datasize = DATA_SIZE;
u_int keysize = KEY_SIZE;
u_int numitems = 0;
const char * letters = "abcdefghijklmnopqrstuvwxuz";
struct my_key
{
char keybuf[KEY_SIZE];
};
std::ostream & operator << (std::ostream & o, const my_key & obj)
{
for (int i = 0; i < KEY_SIZE; ++i)
o << obj.keybuf[i];
return o;
}
bool operator == (const my_key & a, const my_key & b)
{
return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0;
}
bool operator != (const my_key & a, const my_key & b)
{
return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0;
}
bool operator < (const my_key & a, const my_key & b)
{
return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
}
bool operator > (const my_key & a, const my_key & b)
{
return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0;
}
bool operator <= (const my_key & a, const my_key & b)
{
return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0;
}
bool operator >= (const my_key & a, const my_key & b)
{
return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0;
}
struct my_data
{
char databuf[DATA_SIZE];
};
std::ostream & operator << (std::ostream & o, const my_data & obj)
{
o << "DATA(size=" << sizeof(obj) << ") ";
return o;
}
my_key min_key, max_key;
struct comp_type : std::binary_function<my_key, my_key, bool>
{
bool operator () (const my_key & a, const my_key & b) const
{
return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
}
static my_key max_value()
{
return max_key;
}
static my_key min_value()
{
return min_key;
}
};
// Key type.
typedef my_key bkey_t;
// Element type for the btree.
struct el_t {
bkey_t key_;
my_data data_;
el_t(bkey_t k) : key_(k) { }
el_t() { }
};
struct key_from_el {
bkey_t operator () (const el_t & v) const
{
return v.key_;
}
};
// Temporary distinction btw UN*X and WIN, since there are some
// problems with the MMAP collection implementation.
#ifdef _WIN32
typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t;
#else
typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t;
#endif
void init()
{
memset(max_key.keybuf, (std::numeric_limits<unsigned char>::max)(), KEY_SIZE);
memset(min_key.keybuf, (std::numeric_limits<unsigned char>::min)(), KEY_SIZE);
}
typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type;
#define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size
#define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size
#define REAL_NODE_MELEMENTS map_type::node_block_type::size
#define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size
typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type;
//typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024> vector_type;
//#define KEYPOS (i % KEY_SIZE)
//#define VALUE (myrand() % 26)
#if 0
unsigned ran32State = 0xdeadbeef;
inline unsigned myrand()
{
return (ran32State = 1664525 * ran32State + 1013904223);
}
inline void rand_key(stxxl::int64 pos, my_key & Key)
{
for (int i = 0; i < KEY_SIZE; ++i)
Key.keybuf[i] = letters[myrand() % 26];
}
#else // a longer pseudo random sequence
long long unsigned ran32State = 0xdeadbeef;
inline long long unsigned myrand()
{
return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL);
}
inline void rand_key(stxxl::int64 /*pos*/, my_key & Key)
{
long long unsigned r = myrand();
for (int i = 0; i < KEY_SIZE; ++i)
{
Key.keybuf[i] = letters[r % 26];
r >>= 5;
}
}
#endif
void run_bdb_btree(stxxl::int64 ops)
{
const char * filename = BDB_FILE;
my_key key1_storage;
my_data data1_storage;
unlink(filename);
memset(key1_storage.keybuf, 'a', KEY_SIZE);
memset(data1_storage.databuf, 'b', DATA_SIZE);
Db db(NULL, 0); // Instantiate the Db object
try {
db.set_errfile(stderr);
db.set_pagesize(pagesize);
db.set_cachesize(0, cachesize, 1);
// Open the database
db.open(NULL, // Transaction pointer
filename, // Database file name
NULL, // Optional logical database name
DB_BTREE, // Database access method
DB_CREATE, // Open flags
0); // File mode (using defaults)
// here we start with the tests
Dbt key1(key1_storage.keybuf, KEY_SIZE);
Dbt data1(data1_storage.databuf, DATA_SIZE);
stxxl::timer Timer;
stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
stxxl::int64 i;
//comp_type cmp_;
ran32State = 0xdeadbeef;
DB_BTREE_STAT * dbstat;
db.stat(NULL, &dbstat, 0);
STXXL_MSG("Records in map: " << dbstat->bt_ndata);
db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
Timer.start();
for (i = 0; i < n_inserts; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
}
Timer.stop();
db.stat(NULL, &dbstat, 0);
STXXL_MSG("Records in map: " << dbstat->bt_ndata);
STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
Timer.reset();
Timer.start();
Dbc * cursorp;
db.cursor(NULL, &cursorp, 0);
for (i = 0; i < n_locates; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
Dbt keyx(key1_storage.keybuf, KEY_SIZE);
Dbt datax(data1_storage.databuf, DATA_SIZE);
cursorp->get(&keyx, &datax, DB_SET_RANGE);
}
Timer.stop();
STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
Timer.reset();
Timer.start();
stxxl::int64 n_scanned = 0;
for (i = 0; i < n_range_queries; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
my_key last_key = key1_storage;
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
if (last_key < key1_storage)
std::swap(last_key, key1_storage);
Dbt keyx(key1_storage.keybuf, KEY_SIZE);
Dbt datax(data1_storage.databuf, DATA_SIZE);
if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
continue;
while (*((my_key *)keyx.get_data()) <= last_key)
{
++n_scanned;
if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
break;
}
if (n_scanned >= 10 * n_range_queries)
{
++i;
break;
}
}
n_range_queries = i;
Timer.stop();
if (cursorp != NULL)
cursorp->close();
STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
" key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
ran32State = 0xdeadbeef;
memset(key1_storage.keybuf, 'a', KEY_SIZE);
Timer.reset();
Timer.start();
for (i = 0; i < n_deletes; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
Dbt keyx(key1_storage.keybuf, KEY_SIZE);
db.del(NULL, &keyx, 0);
}
Timer.stop();
db.stat(NULL, &dbstat, 0);
STXXL_MSG("Records in map: " << dbstat->bt_ndata);
STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
db.close(0);
}
catch (DbException & e)
{
STXXL_ERRMSG("DbException happened");
}
catch (std::exception & e)
{
STXXL_ERRMSG("std::exception happened");
}
unlink(filename);
}
void run_stxxl_map(stxxl::int64 ops)
{
map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE);
Map.enable_prefetching();
stxxl::stats * Stats = stxxl::stats::get_instance();
std::pair<my_key, my_data> element;
memset(element.first.keybuf, 'a', KEY_SIZE);
memset(element.second.databuf, 'b', DATA_SIZE);
stxxl::timer Timer;
stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
stxxl::int64 i;
//comp_type cmp_;
ran32State = 0xdeadbeef;
//stxxl::random_number32 myrand;
STXXL_MSG("Records in map: " << Map.size());
stxxl::stats_data stats_begin(*Stats);
Timer.start();
for (i = 0; i < n_inserts; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
Map.insert(element);
}
Timer.stop();
STXXL_MSG("Records in map: " << Map.size());
STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
const map_type & CMap(Map); // const map reference
stats_begin = stxxl::stats_data(*Stats);
Timer.reset();
Timer.start();
for (i = 0; i < n_locates; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
CMap.lower_bound(element.first);
}
Timer.stop();
STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
stats_begin = stxxl::stats_data(*Stats);
Timer.reset();
Timer.start();
stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
for (i = 0; i < n_range_queries; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
my_key begin_key = element.first;
map_type::const_iterator begin = CMap.lower_bound(element.first);
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
map_type::const_iterator beyond = CMap.lower_bound(element.first);
if (element.first < begin_key)
std::swap(begin, beyond);
while (begin != beyond)
{
my_data tmp = begin->second;
stxxl::STXXL_UNUSED(tmp);
++n_scanned;
++begin;
}
if (n_scanned >= 10 * n_range_queries)
{
++i;
break;
}
}
n_range_queries = i;
Timer.stop();
STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
" key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
ran32State = 0xdeadbeef;
memset(element.first.keybuf, 'a', KEY_SIZE);
memset(element.second.databuf, 'b', DATA_SIZE);
stats_begin = stxxl::stats_data(*Stats);
Timer.reset();
Timer.start();
for (i = 0; i < n_deletes; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
Map.erase(element.first);
}
Timer.stop();
STXXL_MSG("Records in map: " << Map.size());
STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
}
class rand_key_gen
{
stxxl::int64 counter;
my_key & current;
stxxl::random_number32 myrand;
rand_key_gen();
public:
typedef my_key value_type;
rand_key_gen(stxxl::int64 el, my_key & cur) :
counter(el), current(cur)
{
//const stxxl::int64 & i = counter;
//current.keybuf[KEYPOS] = letters[VALUE];
rand_key(counter, current);
}
const my_key & operator * () { return current; }
const my_key * operator -> () { return &current; }
rand_key_gen & operator ++ ()
{
--counter;
//const stxxl::int64 & i = counter;
//current.keybuf[KEYPOS] = letters[VALUE];
rand_key(counter, current);
return *this;
}
bool empty() const { return counter == 0; }
};
template <class InputType>
class key2pair
{
InputType & in;
std::pair<my_key, my_data> current;
key2pair();
public:
typedef std::pair<my_key, my_data> value_type;
key2pair(InputType & in_) : in(in_)
{
if (!in.empty())
current.first = *in;
}
const value_type & operator * () { return current; }
const value_type * operator -> () { return &current; }
key2pair & operator ++ ()
{
++in;
if (!empty())
current.first = *in;
return *this;
}
bool empty() const { return in.empty(); }
};
void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
{
stxxl::stats * Stats = stxxl::stats::get_instance();
std::pair<my_key, my_data> element;
memset(element.first.keybuf, 'a', KEY_SIZE);
memset(element.second.databuf, 'b', DATA_SIZE);
stxxl::timer Timer;
stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
stxxl::int64 i;
//comp_type cmp_;
ran32State = 0xdeadbeef;
//stxxl::random_number32 myrand;
Timer.start();
vector_type SortedSeq(n);
const vector_type & CSortedSeq(SortedSeq);
{
rand_key_gen Gen(n, element.first);
typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
typedef key2pair<sorter_type> key2pair_type;
key2pair_type Key2Pair(Sorter);
stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
}
Timer.stop();
STXXL_MSG("Finished sorting input. Elapsed time: " <<
(Timer.mseconds() / 1000.) << " seconds.");
stxxl::stats_data stats_begin(*Stats);
Timer.reset();
Timer.start();
// bulk construction
map_type Map(CSortedSeq.begin(),
CSortedSeq.end(),
NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true);
Timer.stop();
STXXL_MSG("Records in map: " << Map.size());
STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
Map.print_statistics(cout);
Map.reset_statistics();
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
Map.disable_prefetching();
stats_begin = stxxl::stats_data(*Stats);
Timer.reset();
Timer.start();
for (i = 0; i < n_inserts; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
Map.insert(element);
}
Timer.stop();
STXXL_MSG("Records in map: " << Map.size());
STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
Map.print_statistics(cout);
Map.reset_statistics();
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
const map_type & CMap(Map); // const map reference
Timer.reset();
Timer.start();
for (i = 0; i < n_locates; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
CMap.lower_bound(element.first);
}
Timer.stop();
STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
Map.print_statistics(cout);
Map.reset_statistics();
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
Map.enable_prefetching();
stats_begin = stxxl::stats_data(*Stats);
Timer.reset();
Timer.start();
stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
for (i = 0; i < n_range_queries; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
my_key begin_key = element.first;
map_type::const_iterator begin = CMap.lower_bound(element.first);
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
map_type::const_iterator beyond = CMap.lower_bound(element.first);
if (element.first < begin_key)
std::swap(begin, beyond);
/*
STXXL_MSG("Looking "<<element.first<<" scanned: "<<n_scanned);
if(beyond==CMap.end())
{
STXXL_MSG("Upper bound "<<"END");
}
else
{
STXXL_MSG("Upper bound "<<((element.first>begin_key)?element.first:begin_key));
}*/
while (begin != beyond)
{
++n_scanned;
++begin;
}
if (n_scanned >= SCAN_LIMIT(n))
{
++i;
break;
}
}
n_range_queries = i;
Timer.stop();
STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
" key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
Map.print_statistics(cout);
Map.reset_statistics();
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
ran32State = 0xdeadbeef;
memset(element.first.keybuf, 'a', KEY_SIZE);
memset(element.second.databuf, 'b', DATA_SIZE);
Map.disable_prefetching();
stats_begin = stxxl::stats_data(*Stats);
Timer.reset();
Timer.start();
for (i = n_deletes; i > 0; --i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.first);
Map.erase(element.first);
}
Timer.stop();
STXXL_MSG("Records in map: " << Map.size());
STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
Map.print_statistics(cout);
Map.reset_statistics();
std::cout << (stxxl::stats_data(*Stats) - stats_begin);
}
typedef AMI_STREAM<el_t> stream_t;
char dummy;
class MyFilter {
public:
bool operator () (const el_t & v) const
{
dummy += v.key_.keybuf[0]; // touch element
return true;
}
};
void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
{
el_t element;
memset(element.key_.keybuf, 'a', KEY_SIZE);
memset(element.data_.databuf, 'b', DATA_SIZE);
// Log debugging info from the application, but not from the library.
tpie_log_init(TPIE_LOG_APP_DEBUG);
MM_manager.set_memory_limit(TOTAL_CACHE_SIZE);
MM_manager.enforce_memory_limit();
stream_t * is = new stream_t;
stxxl::timer Timer;
stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
stxxl::int64 i;
//comp_type cmp_;
ran32State = 0xdeadbeef;
//stxxl::random_number32 myrand;
Timer.start();
{
rand_key_gen Gen(n, element.key_);
typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
//typedef key2pair<sorter_type> key2pair_type;
//key2pair_type Key2Pair(Sorter);
while (!Sorter.empty())
{
is->write_item(*Sorter);
++Sorter;
}
}
Timer.stop();
STXXL_MSG("Finished sorting input. Elapsed time: " <<
(Timer.mseconds() / 1000.) << " seconds.");
Timer.reset();
Timer.start();
// bulk construction
u_btree_t * u_btree;
AMI_btree_params params;
params.node_block_factor = NODE_BLOCK_SIZE / 4096;
params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096;
params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE;
params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE;
u_btree = new u_btree_t(params);
using std::cout;
using std::cerr;
if (!u_btree->is_valid()) {
cerr << "Error initializing btree. Aborting.\n";
delete u_btree;
exit(1);
}
if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR)
cerr << "Error during bulk loading.\n";
Timer.stop();
STXXL_MSG("Records in map: " << u_btree->size());
STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
Timer.reset();
Timer.start();
for (i = 0; i < n_inserts; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.key_);
u_btree->insert(element);
}
Timer.stop();
STXXL_MSG("Records in map: " << u_btree->size());
STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
Timer.reset();
Timer.start();
el_t result;
for (i = 0; i < n_locates; ++i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.key_);
u_btree->succ(element.key_, result);
}
Timer.stop();
STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
Timer.reset();
Timer.start();
stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
MyFilter filter;
for (i = 0; i < n_range_queries; ++i)
{
rand_key(i, element.key_);
my_key begin_key = element.key_;
rand_key(i, element.key_);
if (element.key_ < begin_key)
n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter);
else
n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter);
if (n_scanned >= SCAN_LIMIT(n))
{
++i;
break;
}
}
n_range_queries = i;
Timer.stop();
STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
" key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
ran32State = 0xdeadbeef;
memset(element.key_.keybuf, 'a', KEY_SIZE);
memset(element.data_.databuf, 'b', DATA_SIZE);
Timer.reset();
Timer.start();
for (i = n_deletes; i > 0; --i)
{
//element.first.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, element.key_);
u_btree->erase(element.key_);
}
Timer.stop();
STXXL_MSG("Records in map: " << u_btree->size());
STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
delete u_btree;
delete is;
}
void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
{
const char * filename = BDB_FILE;
my_key key1_storage;
my_data data1_storage;
#ifdef BDB_BULK_SCAN
int * bulk_buffer = new int[logbufsize / sizeof(int)];
#endif
unlink(filename);
memset(key1_storage.keybuf, 'a', KEY_SIZE);
memset(data1_storage.databuf, 'b', DATA_SIZE);
Db db(NULL, 0); // Instantiate the Db object
try {
// here we start with the tests
Dbt key1(key1_storage.keybuf, KEY_SIZE);
Dbt data1(data1_storage.databuf, DATA_SIZE);
stxxl::timer Timer;
stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
stxxl::int64 i;
//comp_type cmp_;
ran32State = 0xdeadbeef;
Timer.start();
vector_type SortedSeq(n);
//const vector_type & CSortedSeq(SortedSeq);
{
rand_key_gen Gen(n, key1_storage);
typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
typedef key2pair<sorter_type> key2pair_type;
key2pair_type Key2Pair(Sorter);
stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
}
Timer.stop();
STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds.");
db.set_errfile(stderr);
db.set_pagesize(pagesize);
db.set_cachesize(0, cachesize, 10);
STXXL_MSG("BDB cache size set.");
// Open the database
db.open(NULL, // Transaction pointer
filename, // Database file name
NULL, // Optional logical database name
DB_BTREE, // Database access method
DB_CREATE, // Open flags
0); // File mode (using defaults)
db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
Timer.reset();
Timer.start();
// DBD does not have bulk construction
// however inserting in sorted order might help
// to improve performance
vector_type::const_iterator cit = SortedSeq.begin();
for (i = 0; i < n; ++i, ++cit)
{
key1_storage = cit->first;
db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
}
Timer.stop();
DB_BTREE_STAT * dbstat;
db.stat(NULL, &dbstat, 0);
STXXL_MSG("Records in map: " << dbstat->bt_ndata);
STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
db.stat_print(0);
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
Timer.reset();
Timer.start();
for (i = 0; i < n_inserts; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
}
Timer.stop();
db.stat(NULL, &dbstat, 0);
STXXL_MSG("Records in map: " << dbstat->bt_ndata);
STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
db.stat_print(0);
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
Timer.reset();
Timer.start();
Dbc * cursorp;
db.cursor(NULL, &cursorp, 0);
for (i = 0; i < n_locates; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
Dbt keyx(key1_storage.keybuf, KEY_SIZE);
Dbt datax(data1_storage.databuf, DATA_SIZE);
cursorp->get(&keyx, &datax, DB_SET_RANGE);
}
Timer.stop();
STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
db.stat_print(0);
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
Timer.reset();
Timer.start();
stxxl::int64 n_scanned = 0;
for (i = 0; i < n_range_queries; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
my_key last_key = key1_storage;
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
if (last_key < key1_storage)
std::swap(last_key, key1_storage);
//STXXL_MSG("Looking "<<key1_storage<<" scanned: "<<n_scanned);
//STXXL_MSG("Upper bound "<<last_key);
Dbt keyx(key1_storage.keybuf, KEY_SIZE);
#ifdef BDB_BULK_SCAN
Dbt datax(bulk_buffer, DATA_SIZE);
datax.set_ulen(logbufsize);
datax.set_flags(DB_DBT_USERMEM);
#else
Dbt datax(data1_storage.databuf, DATA_SIZE);
#endif
#ifdef BDB_BULK_SCAN
if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND)
continue;
do
{
DbMultipleKeyDataIterator BulkIterator(datax);
Dbt key1, data1;
while (BulkIterator.next(key1, data1) &&
*((my_key *)key1.get_data()) <= last_key)
{
++n_scanned;
//STXXL_MSG("Result "<<*((my_key *)key1.get_data()));
}
if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND)
break;
if (*((my_key *)keyx.get_data()) > last_key)
{
break;
}
} while (1);
#else
if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
continue;
while (*((my_key *)keyx.get_data()) <= last_key)
{
++n_scanned;
if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
break;
}
#endif
if (n_scanned >= SCAN_LIMIT(n))
{
++i;
break;
}
}
n_range_queries = i;
Timer.stop();
if (cursorp != NULL)
cursorp->close();
STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
" key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
db.stat_print(0);
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
ran32State = 0xdeadbeef;
memset(key1_storage.keybuf, 'a', KEY_SIZE);
Timer.reset();
Timer.start();
for (i = 0; i < n_deletes; ++i)
{
//key1_storage.keybuf[KEYPOS] = letters[VALUE];
rand_key(i, key1_storage);
Dbt keyx(key1_storage.keybuf, KEY_SIZE);
db.del(NULL, &keyx, 0);
}
Timer.stop();
db.stat(NULL, &dbstat, 0);
STXXL_MSG("Records in map: " << dbstat->bt_ndata);
STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
" seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
db.stat_print(0);
db.get_env()->memp_stat_print(DB_STAT_CLEAR);
db.close(0);
}
catch (DbException & e)
{
STXXL_ERRMSG("DbException happened");
}
catch (std::exception & e)
{
STXXL_ERRMSG("std::exception happened");
}
unlink(filename);
#ifdef BDB_BULK_SCAN
delete[] bulk_buffer;
#endif
}
int main(int argc, char * argv[])
{
STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes");
STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes");
STXXL_MSG("stxxl::map Node max elements : " << REAL_NODE_MELEMENTS);
STXXL_MSG("stxxl::map Leaf max elements : " << REAL_LEAF_MELEMENTS);
#ifdef STXXL_DIRECT_IO_OFF
STXXL_MSG("STXXL_DIRECT_IO_OFF is defined");
#else
STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined");
#endif
if (argc < 3)
{
STXXL_MSG("Usage: " << argv[0] << " version #ops");
STXXL_MSG("\t version = 1: test stxxl map");
STXXL_MSG("\t version = 2: test Berkeley DB btree");
STXXL_MSG("\t version = 3: big test stxxl map");
STXXL_MSG("\t version = 4: big test Berkeley DB btree");
STXXL_MSG("\t version = 5: big test TPIE btree");
return -1;
}
init();
int version = atoi(argv[1]);
stxxl::int64 ops = stxxl::atoint64(argv[2]);
STXXL_MSG("Running version : " << version);
STXXL_MSG("Operations to perform: " << ops);
STXXL_MSG("Btree cache size : " << TOTAL_CACHE_SIZE << " bytes");
STXXL_MSG("Leaf block size : " << LEAF_BLOCK_SIZE << " bytes");
switch (version)
{
case 1:
run_stxxl_map(ops);
break;
case 2:
run_bdb_btree(ops);
break;
case 3:
run_stxxl_map_big(ops, 100000);
break;
case 4:
run_bdb_btree_big(ops, 100000);
break;
case 5:
run_tpie_btree_big(ops, 100000);
break;
default:
STXXL_MSG("Unsupported version " << version);
}
}