Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 13:58:55 +01:00
parent 4af19165ec
commit 68073add76
12458 changed files with 12350765 additions and 2 deletions

4
3party/succinct/perftest/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
perftest_bp_vector
perftest_bp_vector_rmq
perftest_cartesian_tree
perftest_elias_fano

View file

@ -0,0 +1,9 @@
file(GLOB SUCCINCT_TEST_SOURCES perftest_*.cpp)
foreach(TEST_SRC ${SUCCINCT_TEST_SOURCES})
get_filename_component (TEST_SRC_NAME ${TEST_SRC} NAME_WE)
add_executable(${TEST_SRC_NAME} ${TEST_SRC})
target_link_libraries(${TEST_SRC_NAME}
succinct
${Boost_LIBRARIES}
)
endforeach(TEST_SRC)

View file

@ -0,0 +1,119 @@
#include <iostream>
#include <vector>
#include <boost/lexical_cast.hpp>
#include "util.hpp"
#include "test_bp_vector_common.hpp"
#include "bp_vector.hpp"
#include "mapper.hpp"
#include "perftest_common.hpp"
// this generic trait enables easy comparisons with other BP
// implementations
struct succinct_bp_vector_traits
{
typedef succinct::bit_vector_builder builder_type;
typedef succinct::bp_vector bp_vector_type;
static inline void build(builder_type& builder, bp_vector_type& bp)
{
bp_vector_type(&builder, true, false).swap(bp);
}
static inline std::string log_header()
{
return std::string("SUCCINCT");
}
static inline double bits_per_bp(bp_vector_type& vec)
{
return double(succinct::mapper::size_of(vec))
* 8.0 / double(vec.size());
}
};
template <typename BpVector>
double time_visit(BpVector const& bp, size_t sample_size = 1000000)
{
std::vector<char> random_bits;
for (size_t i = 0; i < sample_size; ++i) {
random_bits.push_back(rand() > (RAND_MAX / 2));
}
volatile size_t foo = 0; // to prevent the compiler to optimize away the loop
size_t find_close_performed = 0;
size_t steps_done = 0;
double elapsed;
SUCCINCT_TIMEIT(elapsed) {
while (steps_done < sample_size) {
size_t cur_node = 1; // root
while (bp[cur_node] && steps_done < sample_size) {
if (random_bits[steps_done++]) {
size_t next_node = bp.find_close(cur_node);
cur_node = next_node + 1;
find_close_performed += 1;
} else {
cur_node += 1;
}
}
foo = cur_node;
}
}
(void)foo; // silence warning
return elapsed / double(find_close_performed);
}
template <typename BpVectorTraits>
void build_random_binary_tree(typename BpVectorTraits::bp_vector_type& bp, size_t size)
{
typename BpVectorTraits::builder_type builder;
succinct::random_binary_tree(builder, size);
BpVectorTraits::build(builder, bp);
}
template <typename BpVectorTraits>
void bp_benchmark(size_t runs)
{
srand(42); // make everything deterministic
static const size_t sample_size = 10000000;
std::cout << BpVectorTraits::log_header() << std::endl;
std::cout << "log_height" "\t" "find_close_us" "\t" "bits_per_bp" << std::endl;
for (size_t ln = 10; ln <= 28; ln += 2) {
size_t n = 1 << ln;
double elapsed = 0;
double bits_per_bp = 0;
for (size_t run = 0; run < runs; ++run) {
typename BpVectorTraits::bp_vector_type bp;
build_random_binary_tree<BpVectorTraits>(bp, n);
elapsed += time_visit(bp, sample_size);
bits_per_bp += BpVectorTraits::bits_per_bp(bp);
}
std::cout << ln
<< "\t" << elapsed / double(runs)
<< "\t" << bits_per_bp / double(runs)
<< std::endl;
}
}
int main(int argc, char** argv)
{
size_t runs;
if (argc == 2) {
runs = boost::lexical_cast<size_t>(argv[1]);
} else {
runs = 1;
}
bp_benchmark<succinct_bp_vector_traits>(runs);
}

View file

@ -0,0 +1,77 @@
#include <iostream>
#include <vector>
#include <boost/lexical_cast.hpp>
#include "util.hpp"
#include "test_bp_vector_common.hpp"
#include "bp_vector.hpp"
#include "perftest_common.hpp"
double time_avg_rmq(succinct::bp_vector const& bp, size_t sample_size = 1000000)
{
typedef std::pair<uint64_t, uint64_t> range_pair;
std::vector<range_pair> pairs_sample;
for (size_t i = 0; i < sample_size; ++i) {
uint64_t a = uint64_t(rand()) % bp.size();
uint64_t b = a + (uint64_t(rand()) % (bp.size() - a));
pairs_sample.push_back(range_pair(a, b));
}
volatile uint64_t foo; // to prevent the compiler to optimize away the loop
size_t rmq_performed = 0;
double elapsed;
SUCCINCT_TIMEIT(elapsed) {
for (size_t i = 0; i < pairs_sample.size(); ++i) {
range_pair r = pairs_sample[i];
foo = bp.excess_rmq(r.first, r.second);
rmq_performed += 1;
}
}
(void)foo; // silence warning
return elapsed / double(rmq_performed);
}
void build_random_binary_tree(succinct::bp_vector& bp, size_t size)
{
succinct::bit_vector_builder builder;
succinct::random_binary_tree(builder, size);
succinct::bp_vector(&builder, true, false).swap(bp);
}
void rmq_benchmark(size_t runs)
{
srand(42); // make everything deterministic
static const size_t sample_size = 10000000;
std::cout << "SUCCINCT_EXCESS_RMQ" << std::endl;
std::cout << "log_height" "\t" "excess_rmq_us" << std::endl;
for (size_t ln = 10; ln <= 28; ln += 2) {
size_t n = 1 << ln;
double elapsed = 0;
for (size_t run = 0; run < runs; ++run) {
succinct::bp_vector bp;
build_random_binary_tree(bp, n);
elapsed += time_avg_rmq(bp, sample_size);
}
std::cout << ln << "\t" << elapsed / double(runs) << std::endl;
}
}
int main(int argc, char** argv)
{
size_t runs;
if (argc == 2) {
runs = boost::lexical_cast<size_t>(argv[1]);
} else {
runs = 1;
}
rmq_benchmark(runs);
}

View file

@ -0,0 +1,74 @@
#include <iostream>
#include <vector>
#include <boost/lexical_cast.hpp>
#include "util.hpp"
#include "test_bp_vector_common.hpp"
#include "cartesian_tree.hpp"
#include "perftest_common.hpp"
double time_avg_rmq(succinct::cartesian_tree const& tree, size_t sample_size = 1000000)
{
typedef std::pair<uint64_t, uint64_t> range_pair;
std::vector<range_pair> pairs_sample;
for (size_t i = 0; i < sample_size; ++i) {
uint64_t a = uint64_t(rand()) % tree.size();
uint64_t b = a + (uint64_t(rand()) % (tree.size() - a));
pairs_sample.push_back(range_pair(a, b));
}
volatile uint64_t foo; // to prevent the compiler to optimize away the loop
size_t rmq_performed = 0;
double elapsed;
SUCCINCT_TIMEIT(elapsed) {
for (size_t i = 0; i < pairs_sample.size(); ++i) {
range_pair r = pairs_sample[i];
foo = tree.rmq(r.first, r.second);
rmq_performed += 1;
}
}
(void)foo; // silence warning
return elapsed / double(rmq_performed);
}
void rmq_benchmark(size_t runs)
{
srand(42); // make everything deterministic
static const size_t sample_size = 10000000;
std::cout << "SUCCINCT_CARTESIAN_TREE_RMQ" << std::endl;
std::cout << "log_height" "\t" "excess_rmq_us" << std::endl;
for (size_t ln = 10; ln <= 28; ln += 2) {
size_t n = 1 << ln;
double elapsed = 0;
for (size_t run = 0; run < runs; ++run) {
std::vector<uint64_t> v(n);
for (size_t i = 0; i < v.size(); ++i) {
v[i] = uint64_t(rand()) % 1024;
}
succinct::cartesian_tree tree(v);
elapsed += time_avg_rmq(tree, sample_size);
}
std::cout << ln << "\t" << elapsed / double(runs) << std::endl;
}
}
int main(int argc, char** argv)
{
size_t runs;
if (argc == 2) {
runs = boost::lexical_cast<size_t>(argv[1]);
} else {
runs = 1;
}
rmq_benchmark(runs);
}

View file

@ -0,0 +1,33 @@
#pragma once
#include <boost/date_time/posix_time/posix_time_types.hpp>
namespace succinct {
namespace detail {
struct timer {
timer()
: m_tick(boost::posix_time::microsec_clock::universal_time())
, m_done(false)
{}
bool done() { return m_done; }
void report(double& elapsed) {
elapsed = (double)(boost::posix_time::microsec_clock::universal_time() - m_tick).total_microseconds();
m_done = true;
}
const std::string m_msg;
boost::posix_time::ptime m_tick;
bool m_done;
};
}
}
#define SUCCINCT_TIMEIT(elapsed) \
for (::succinct::detail::timer SUCCINCT_TIMEIT_timer; \
!SUCCINCT_TIMEIT_timer.done(); \
SUCCINCT_TIMEIT_timer.report(elapsed)) \
/**/

View file

@ -0,0 +1,114 @@
#include <iostream>
#include <vector>
#include <boost/lexical_cast.hpp>
#include <boost/tuple/tuple.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include "util.hpp"
#include "elias_fano.hpp"
#include "mapper.hpp"
#include "perftest_common.hpp"
struct monotone_generator
{
monotone_generator(uint64_t m, uint8_t bits, unsigned int seed)
: m_gen(seed)
, m_bits(bits)
{
m_stack.push_back(state_t(0, m, 0));
}
uint64_t next()
{
uint64_t cur_word, cur_m;
uint8_t cur_depth;
assert(m_stack.size());
boost::tie(cur_word, cur_m, cur_depth) = m_stack.back();
m_stack.pop_back();
while (cur_depth < m_bits) {
boost::random::uniform_int_distribution<uint64_t> dist(0, cur_m);
uint64_t left_m = dist(m_gen);
uint64_t right_m = cur_m - left_m;
// push left and right children, if present
if (right_m > 0) {
m_stack.push_back(state_t(cur_word | (uint64_t(1) << (m_bits - cur_depth - 1)),
right_m, cur_depth + 1));
}
if (left_m > 0) {
m_stack.push_back(state_t(cur_word, left_m, cur_depth + 1));
}
// pop next child in visit
boost::tie(cur_word, cur_m, cur_depth) = m_stack.back();
m_stack.pop_back();
}
if (cur_m > 1) {
// push back the current leaf, with cur_m decreased by one
m_stack.push_back(state_t(cur_word, cur_m - 1, cur_depth));
}
return cur_word;
}
bool done() const
{
return m_stack.empty();
}
private:
typedef boost::tuple<uint64_t /* cur_word */,
uint64_t /* cur_m */,
uint64_t /* cur_depth */> state_t;
std::vector<state_t> m_stack;
boost::random::mt19937 m_gen;
uint8_t m_bits;
};
void ef_enumeration_benchmark(uint64_t m, uint8_t bits)
{
succinct::elias_fano::elias_fano_builder bvb(uint64_t(1) << bits, m);
monotone_generator mgen(m, bits, 37);
for (size_t i = 0; i < m; ++i) {
bvb.push_back(mgen.next());
}
assert(mgen.done());
succinct::elias_fano ef(&bvb);
succinct::mapper::size_tree_of(ef)->dump();
double elapsed;
uint64_t foo = 0;
SUCCINCT_TIMEIT(elapsed) {
succinct::elias_fano::select_enumerator it(ef, 0);
for (size_t i = 0; i < m; ++i) {
foo ^= it.next();
}
}
volatile uint64_t vfoo = foo;
(void)vfoo; // silence warning
std::cerr << "Elapsed: " << elapsed / 1000 << " msec\n"
<< double(m) / elapsed << " Mcodes/s" << std::endl;
}
int main(int argc, char** argv)
{
if (argc != 3) {
std::cerr << "Invalid arguments" << std::endl;
std::terminate();
}
size_t m = boost::lexical_cast<uint64_t>(argv[1]);
uint8_t bits = uint8_t(boost::lexical_cast<int>(argv[2]));
ef_enumeration_benchmark(m, bits);
}