Repo created
This commit is contained in:
parent
4af19165ec
commit
68073add76
12458 changed files with 12350765 additions and 2 deletions
107
libs/coding/CMakeLists.txt
Normal file
107
libs/coding/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
project(coding)
|
||||
|
||||
set(SRC
|
||||
base64.cpp
|
||||
base64.hpp
|
||||
bit_streams.hpp
|
||||
buffer_reader.hpp
|
||||
buffered_file_writer.cpp
|
||||
buffered_file_writer.hpp
|
||||
bwt.cpp
|
||||
bwt.hpp
|
||||
bwt_coder.hpp
|
||||
byte_stream.hpp
|
||||
compressed_bit_vector.cpp
|
||||
compressed_bit_vector.hpp
|
||||
constants.hpp
|
||||
csv_reader.cpp
|
||||
csv_reader.hpp
|
||||
dd_vector.hpp
|
||||
diff.hpp
|
||||
elias_coder.hpp
|
||||
endianness.hpp
|
||||
file_reader.cpp
|
||||
file_reader.hpp
|
||||
file_sort.hpp
|
||||
file_writer.cpp
|
||||
file_writer.hpp
|
||||
files_container.cpp
|
||||
files_container.hpp
|
||||
fixed_bits_ddvector.hpp
|
||||
geometry_coding.cpp
|
||||
geometry_coding.hpp
|
||||
hex.cpp
|
||||
hex.hpp
|
||||
huffman.cpp
|
||||
huffman.hpp
|
||||
internal/file64_api.hpp
|
||||
internal/file_data.cpp
|
||||
internal/file_data.hpp
|
||||
internal/xmlparser.hpp
|
||||
map_uint32_to_val.hpp
|
||||
memory_region.hpp
|
||||
mmap_reader.cpp
|
||||
mmap_reader.hpp
|
||||
move_to_front.cpp
|
||||
move_to_front.hpp
|
||||
parse_xml.hpp
|
||||
point_coding.cpp
|
||||
point_coding.hpp
|
||||
read_write_utils.hpp
|
||||
reader.cpp
|
||||
reader.hpp
|
||||
reader_cache.hpp
|
||||
reader_streambuf.cpp
|
||||
reader_streambuf.hpp
|
||||
reader_wrapper.hpp
|
||||
reader_writer_ops.cpp
|
||||
reader_writer_ops.hpp
|
||||
serdes_binary_header.hpp
|
||||
serdes_json.hpp
|
||||
sha1.cpp
|
||||
sha1.hpp
|
||||
simple_dense_coding.cpp
|
||||
simple_dense_coding.hpp
|
||||
sparse_vector.hpp
|
||||
streams.hpp
|
||||
streams_common.hpp
|
||||
streams_sink.hpp
|
||||
string_utf8_multilang.cpp
|
||||
string_utf8_multilang.hpp
|
||||
succinct_mapper.hpp
|
||||
tesselator_decl.hpp
|
||||
text_storage.hpp
|
||||
traffic.cpp
|
||||
traffic.hpp
|
||||
transliteration.cpp
|
||||
transliteration.hpp
|
||||
url.cpp
|
||||
url.hpp
|
||||
value_opt_string.hpp
|
||||
var_record_reader.hpp
|
||||
var_serial_vector.hpp
|
||||
varint.hpp
|
||||
write_to_sink.hpp
|
||||
writer.hpp
|
||||
zip_creator.cpp
|
||||
zip_creator.hpp
|
||||
zip_reader.cpp
|
||||
zip_reader.hpp
|
||||
zlib.cpp
|
||||
zlib.hpp
|
||||
)
|
||||
|
||||
omim_add_library(${PROJECT_NAME} ${SRC})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
base
|
||||
expat::expat
|
||||
cppjansson
|
||||
succinct
|
||||
ICU::uc
|
||||
ICU::i18n # For transliteration.
|
||||
minizip
|
||||
ZLIB::ZLIB
|
||||
)
|
||||
|
||||
omim_add_test_subdirectory(coding_tests)
|
||||
41
libs/coding/base64.cpp
Normal file
41
libs/coding/base64.cpp
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#include "coding/base64.hpp"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wreorder"
|
||||
#elif defined(__clang__)
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunused-local-typedef"
|
||||
#endif
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/archive/iterators/base64_from_binary.hpp>
|
||||
#include <boost/archive/iterators/binary_from_base64.hpp>
|
||||
#include <boost/archive/iterators/transform_width.hpp>
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#elif defined(__clang__)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace base64
|
||||
{
|
||||
// From: http://stackoverflow.com/a/28471421
|
||||
|
||||
std::string Decode(std::string const & val)
|
||||
{
|
||||
using namespace boost::archive::iterators;
|
||||
using It = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
|
||||
return boost::algorithm::trim_right_copy_if(std::string(It(std::begin(val)), It(std::end(val))),
|
||||
[](char c) { return c == '\0'; });
|
||||
}
|
||||
|
||||
std::string Encode(std::string_view val)
|
||||
{
|
||||
using namespace boost::archive::iterators;
|
||||
using It = base64_from_binary<transform_width<std::string_view::const_iterator, 6, 8>>;
|
||||
auto tmp = std::string(It(std::begin(val)), It(std::end(val)));
|
||||
return tmp.append((3 - val.size() % 3) % 3, '=');
|
||||
}
|
||||
} // namespace base64
|
||||
9
libs/coding/base64.hpp
Normal file
9
libs/coding/base64.hpp
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace base64
|
||||
{
|
||||
std::string Encode(std::string_view bytesToEncode);
|
||||
std::string Decode(std::string const & base64CharsToDecode);
|
||||
} // namespace base64
|
||||
217
libs/coding/bit_streams.hpp
Normal file
217
libs/coding/bit_streams.hpp
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/bits.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
|
||||
static_assert(CHAR_BIT == 8);
|
||||
|
||||
template <typename TWriter>
|
||||
class BitWriter
|
||||
{
|
||||
static uint8_t constexpr kMinBits = CHAR_BIT;
|
||||
|
||||
public:
|
||||
explicit BitWriter(TWriter & writer) : m_writer(writer), m_buf(0), m_bitsWritten(0) {}
|
||||
|
||||
~BitWriter()
|
||||
{
|
||||
try
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG(LWARNING, ("Caught an exception when flushing BitWriter."));
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the number of bits that have been sent to BitWriter,
|
||||
// including those that are in m_buf and are possibly not flushed
|
||||
// yet.
|
||||
uint64_t BitsWritten() const { return m_bitsWritten; }
|
||||
|
||||
// Writes n bits starting with the least significant bit. They are
|
||||
// written one byte at a time so endianness is of no concern.
|
||||
void Write(uint8_t bits, uint8_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
return;
|
||||
|
||||
bits = bits & bits::GetFullMask(n);
|
||||
|
||||
ASSERT_LESS_OR_EQUAL(n, CHAR_BIT, ());
|
||||
uint32_t bufferedBits = m_bitsWritten % CHAR_BIT;
|
||||
m_bitsWritten += n;
|
||||
if (n + bufferedBits > CHAR_BIT)
|
||||
{
|
||||
uint8_t b = (bits << bufferedBits) | m_buf;
|
||||
m_writer.Write(&b, 1);
|
||||
m_buf = bits >> (CHAR_BIT - bufferedBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bufferedBits > 0)
|
||||
{
|
||||
bits = (bits << bufferedBits) | m_buf;
|
||||
n += bufferedBits;
|
||||
}
|
||||
if (n == CHAR_BIT)
|
||||
{
|
||||
m_writer.Write(&bits, 1);
|
||||
bits = 0;
|
||||
}
|
||||
m_buf = bits;
|
||||
}
|
||||
}
|
||||
|
||||
#define WRITE_BYTE() \
|
||||
{ \
|
||||
Write(bits, std::min(kMinBits, n)); \
|
||||
if (n <= kMinBits) \
|
||||
return; \
|
||||
n -= kMinBits; \
|
||||
bits >>= kMinBits; \
|
||||
}
|
||||
|
||||
// Same as Write but accept up to 32 bits to write.
|
||||
void WriteAtMost32Bits(uint32_t bits, uint8_t n)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(n, 32, ());
|
||||
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
|
||||
Write(bits, n);
|
||||
}
|
||||
|
||||
// Same as Write but accept up to 64 bits to write.
|
||||
void WriteAtMost64Bits(uint64_t bits, uint8_t n)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(n, 64, ());
|
||||
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
WRITE_BYTE();
|
||||
|
||||
Write(bits, n);
|
||||
}
|
||||
|
||||
#undef WRITE_BYTE
|
||||
|
||||
private:
|
||||
// Writes up to CHAR_BIT-1 last bits if they have not been written
|
||||
// yet and pads them with zeros. This method cannot be made public
|
||||
// because once a byte has been flushed there is no going back.
|
||||
void Flush()
|
||||
{
|
||||
if (m_bitsWritten % CHAR_BIT != 0)
|
||||
m_writer.Write(&m_buf, 1);
|
||||
}
|
||||
|
||||
TWriter & m_writer;
|
||||
uint8_t m_buf;
|
||||
uint64_t m_bitsWritten;
|
||||
};
|
||||
|
||||
template <typename TSource>
|
||||
class BitReader
|
||||
{
|
||||
static uint8_t constexpr kMinBits = CHAR_BIT;
|
||||
|
||||
public:
|
||||
explicit BitReader(TSource & src) : m_src(src), m_bitsRead(0), m_bufferedBits(0), m_buf(0) {}
|
||||
|
||||
// Returns the total number of bits read from this BitReader.
|
||||
uint64_t BitsRead() const { return m_bitsRead; }
|
||||
|
||||
// Reads n bits and returns them as the least significant bits of an
|
||||
// 8-bit number. The underlying m_src is supposed to be
|
||||
// byte-aligned (which is the case when it reads from the place that
|
||||
// was written to using BitWriter). Read may use one lookahead
|
||||
// byte.
|
||||
uint8_t Read(uint8_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
uint8_t constexpr kByteMask = 0xFF;
|
||||
|
||||
ASSERT_LESS_OR_EQUAL(n, CHAR_BIT, ());
|
||||
m_bitsRead += n;
|
||||
uint8_t result = 0;
|
||||
if (n <= m_bufferedBits)
|
||||
{
|
||||
result = m_buf & (kByteMask >> (CHAR_BIT - n));
|
||||
m_bufferedBits -= n;
|
||||
m_buf >>= n;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint8_t nextByte;
|
||||
m_src.Read(&nextByte, 1);
|
||||
uint32_t low = n - m_bufferedBits;
|
||||
result = ((nextByte & (kByteMask >> (CHAR_BIT - low))) << m_bufferedBits) | m_buf;
|
||||
m_buf = nextByte >> low;
|
||||
m_bufferedBits = CHAR_BIT - low;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#define READ_BYTE(i) \
|
||||
{ \
|
||||
result = result | (static_cast<decltype(result)>(Read(std::min(n, kMinBits))) << (i * kMinBits)); \
|
||||
if (n <= kMinBits) \
|
||||
return result; \
|
||||
n -= kMinBits; \
|
||||
}
|
||||
|
||||
// Same as Read but accept up to 32 bits to read.
|
||||
uint32_t ReadAtMost32Bits(uint8_t n)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(n, 32, ());
|
||||
|
||||
uint32_t result = 0;
|
||||
|
||||
READ_BYTE(0);
|
||||
READ_BYTE(1);
|
||||
READ_BYTE(2);
|
||||
|
||||
return result | (static_cast<uint32_t>(Read(n)) << (3 * kMinBits));
|
||||
}
|
||||
|
||||
// Same as Read but accept up to 64 bits to read.
|
||||
uint64_t ReadAtMost64Bits(uint8_t n)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(n, 64, ());
|
||||
|
||||
uint64_t result = 0;
|
||||
|
||||
READ_BYTE(0);
|
||||
READ_BYTE(1);
|
||||
READ_BYTE(2);
|
||||
READ_BYTE(3);
|
||||
READ_BYTE(4);
|
||||
READ_BYTE(5);
|
||||
READ_BYTE(6);
|
||||
|
||||
return result | (static_cast<uint64_t>(Read(n)) << (7 * kMinBits));
|
||||
}
|
||||
|
||||
#undef READ_BYTE
|
||||
|
||||
private:
|
||||
TSource & m_src;
|
||||
uint64_t m_bitsRead;
|
||||
uint32_t m_bufferedBits;
|
||||
uint8_t m_buf;
|
||||
};
|
||||
70
libs/coding/buffer_reader.hpp
Normal file
70
libs/coding/buffer_reader.hpp
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
/// Reader from buffer with ownership on it, but cheap copy constructor.
|
||||
class BufferReader : public Reader
|
||||
{
|
||||
public:
|
||||
template <class ReaderT>
|
||||
explicit BufferReader(ReaderT const & reader, uint64_t offset = 0)
|
||||
{
|
||||
uint64_t const rSize = reader.Size();
|
||||
ASSERT_LESS_OR_EQUAL(offset, rSize, (offset, rSize));
|
||||
|
||||
InitBuffer(static_cast<size_t>(rSize - offset));
|
||||
reader.Read(offset, m_data.get(), m_size);
|
||||
}
|
||||
|
||||
explicit BufferReader(char const * p, size_t count)
|
||||
{
|
||||
InitBuffer(count);
|
||||
memcpy(m_data.get(), p, count);
|
||||
}
|
||||
|
||||
uint64_t Size() const { return m_size; }
|
||||
|
||||
void Read(uint64_t pos, void * p, size_t size) const
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(pos + size, Size(), (pos, size));
|
||||
memcpy(p, m_data.get() + static_cast<size_t>(pos) + m_offset, size);
|
||||
}
|
||||
|
||||
BufferReader SubReader(uint64_t pos, uint64_t size) const { return BufferReader(*this, pos, size); }
|
||||
|
||||
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
// Can't use make_unique with private constructor.
|
||||
return std::unique_ptr<Reader>(new BufferReader(*this, pos, size));
|
||||
}
|
||||
|
||||
private:
|
||||
BufferReader(BufferReader const & src, uint64_t pos, uint64_t size) : m_data(src.m_data)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(pos + size, src.Size(), (pos, size));
|
||||
|
||||
m_offset = static_cast<size_t>(src.m_offset + pos);
|
||||
m_size = static_cast<size_t>(size);
|
||||
}
|
||||
|
||||
void InitBuffer(size_t count)
|
||||
{
|
||||
m_offset = 0;
|
||||
m_size = count;
|
||||
m_data.reset(new char[m_size], Deleter());
|
||||
}
|
||||
|
||||
size_t m_offset, m_size;
|
||||
|
||||
struct Deleter
|
||||
{
|
||||
void operator()(char * p) { delete[] p; }
|
||||
};
|
||||
|
||||
std::shared_ptr<char> m_data;
|
||||
};
|
||||
75
libs/coding/buffered_file_writer.cpp
Normal file
75
libs/coding/buffered_file_writer.cpp
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
#include "coding/buffered_file_writer.hpp"
|
||||
|
||||
#include "coding/internal/file_data.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
BufferedFileWriter::BufferedFileWriter(std::string const & fileName, Op operation /* = OP_WRITE_TRUNCATE */,
|
||||
size_t bufferSize /* = 4096 */)
|
||||
: FileWriter(fileName, operation)
|
||||
{
|
||||
CHECK_GREATER(bufferSize, 0, ());
|
||||
m_buf.reserve(bufferSize);
|
||||
}
|
||||
|
||||
BufferedFileWriter::~BufferedFileWriter() noexcept(false)
|
||||
{
|
||||
DropBuffer();
|
||||
}
|
||||
|
||||
void BufferedFileWriter::Seek(uint64_t pos)
|
||||
{
|
||||
DropBuffer();
|
||||
FileWriter::Seek(pos);
|
||||
}
|
||||
|
||||
uint64_t BufferedFileWriter::Pos() const
|
||||
{
|
||||
return FileWriter::Pos() + m_buf.size();
|
||||
}
|
||||
|
||||
void BufferedFileWriter::Write(void const * p, size_t size)
|
||||
{
|
||||
// Need to use pointer arithmetic.
|
||||
auto src = static_cast<uint8_t const *>(p);
|
||||
|
||||
while (size >= m_buf.capacity() - m_buf.size())
|
||||
{
|
||||
if (m_buf.empty())
|
||||
{
|
||||
FileWriter::Write(src, m_buf.capacity());
|
||||
src += m_buf.capacity();
|
||||
size -= m_buf.capacity();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto const copyCount = m_buf.capacity() - m_buf.size();
|
||||
std::copy(src, src + copyCount, std::back_inserter(m_buf));
|
||||
DropBuffer();
|
||||
src += copyCount;
|
||||
size -= copyCount;
|
||||
}
|
||||
}
|
||||
|
||||
std::copy(src, src + size, std::back_inserter(m_buf));
|
||||
}
|
||||
|
||||
uint64_t BufferedFileWriter::Size() const
|
||||
{
|
||||
return FileWriter::Size() + m_buf.size();
|
||||
}
|
||||
|
||||
void BufferedFileWriter::Flush()
|
||||
{
|
||||
DropBuffer();
|
||||
FileWriter::Flush();
|
||||
}
|
||||
|
||||
void BufferedFileWriter::DropBuffer()
|
||||
{
|
||||
if (m_buf.empty())
|
||||
return;
|
||||
|
||||
FileWriter::Write(m_buf.data(), m_buf.size());
|
||||
m_buf.clear();
|
||||
}
|
||||
29
libs/coding/buffered_file_writer.hpp
Normal file
29
libs/coding/buffered_file_writer.hpp
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/file_writer.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class BufferedFileWriter : public FileWriter
|
||||
{
|
||||
public:
|
||||
explicit BufferedFileWriter(std::string const & fileName, Op operation = OP_WRITE_TRUNCATE, size_t bufferSize = 4096);
|
||||
|
||||
~BufferedFileWriter() noexcept(false) override;
|
||||
|
||||
// Writer overrides:
|
||||
void Seek(uint64_t pos) override;
|
||||
uint64_t Pos() const override;
|
||||
void Write(void const * p, size_t size) override;
|
||||
|
||||
// FileWriter overrides:
|
||||
uint64_t Size() const override;
|
||||
void Flush() override;
|
||||
|
||||
private:
|
||||
void DropBuffer();
|
||||
|
||||
std::vector<uint8_t> m_buf;
|
||||
};
|
||||
193
libs/coding/bwt.cpp
Normal file
193
libs/coding/bwt.cpp
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
#include "coding/bwt.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/suffix_array.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
namespace
|
||||
{
|
||||
size_t constexpr kNumBytes = 256;
|
||||
|
||||
// Fake trailing '$' for the BWT, used for original string
|
||||
// reconstruction.
|
||||
uint32_t constexpr kEOS = 256;
|
||||
|
||||
// FirstColumn represents the first column in the BWT matrix. As
|
||||
// during reverse BWT we need to reconstruct canonical first column,
|
||||
// with '$' as the first element, this wrapper is used. Also note that
|
||||
// other characters in the first column are sorted, so we actually
|
||||
// don't need to store them explicitly, it's enough to store start
|
||||
// positions of the corresponding groups of consecutive characters.
|
||||
class FirstColumn
|
||||
{
|
||||
public:
|
||||
FirstColumn(size_t n, uint8_t const * s) : m_n(n), m_starts({})
|
||||
{
|
||||
m_starts.fill(0);
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
++m_starts[s[i]];
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < m_starts.size(); ++i)
|
||||
{
|
||||
auto const count = m_starts[i];
|
||||
m_starts[i] = offset;
|
||||
offset += count;
|
||||
}
|
||||
}
|
||||
|
||||
size_t Size() const { return m_n + 1; }
|
||||
|
||||
uint32_t operator[](size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
if (i == 0)
|
||||
return kEOS;
|
||||
|
||||
--i;
|
||||
auto it = std::upper_bound(m_starts.begin(), m_starts.end(), i);
|
||||
ASSERT(it != m_starts.begin(), ());
|
||||
--it;
|
||||
return static_cast<uint32_t>(std::distance(m_starts.begin(), it));
|
||||
}
|
||||
|
||||
// Returns the rank of the i-th symbol among symbols with the same
|
||||
// value.
|
||||
size_t Rank(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
if (i == 0)
|
||||
return 0;
|
||||
|
||||
--i;
|
||||
auto it = std::upper_bound(m_starts.begin(), m_starts.end(), i);
|
||||
if (it == m_starts.begin())
|
||||
return i;
|
||||
--it;
|
||||
return i - *it;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t const m_n;
|
||||
std::array<size_t, kNumBytes> m_starts;
|
||||
};
|
||||
|
||||
// LastColumn represents the last column in the BWT matrix. As during
|
||||
// reverse BWT we need to reconstruct canonical last column, |s| is
|
||||
// replaced by s[start] + s[0, start) + '$' + s[start, n).
|
||||
class LastColumn
|
||||
{
|
||||
public:
|
||||
LastColumn(size_t n, size_t start, uint8_t const * s) : m_n(n), m_start(start), m_s(s)
|
||||
{
|
||||
for (size_t i = 0; i < Size(); ++i)
|
||||
{
|
||||
auto const b = (*this)[i];
|
||||
if (b == kEOS)
|
||||
continue;
|
||||
ASSERT_LESS(b, kNumBytes, ());
|
||||
m_table[b].push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
size_t Size() const { return m_n + 1; }
|
||||
|
||||
uint32_t operator[](size_t i) const
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
ASSERT_LESS(m_start, m_n, ());
|
||||
return m_s[m_start];
|
||||
}
|
||||
|
||||
if (i == m_start + 1)
|
||||
return kEOS;
|
||||
|
||||
ASSERT_LESS_OR_EQUAL(i, m_n, ());
|
||||
return m_s[i - 1];
|
||||
}
|
||||
|
||||
// Returns the index of the |rank|-th |byte| in the canonical BWT
|
||||
// last column.
|
||||
size_t Select(uint32_t byte, size_t rank)
|
||||
{
|
||||
if (byte == kEOS)
|
||||
{
|
||||
ASSERT_EQUAL(rank, 0, ());
|
||||
return 0;
|
||||
}
|
||||
|
||||
ASSERT_LESS(rank, m_table[byte].size(), (byte, rank));
|
||||
return m_table[byte][rank];
|
||||
}
|
||||
|
||||
private:
|
||||
size_t const m_n;
|
||||
size_t const m_start;
|
||||
uint8_t const * const m_s;
|
||||
std::array<std::vector<size_t>, kNumBytes> m_table;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace coding
|
||||
{
|
||||
size_t BWT(size_t n, uint8_t const * s, uint8_t * r)
|
||||
{
|
||||
std::vector<size_t> sa(n);
|
||||
base::Skew(n, s, sa.data());
|
||||
|
||||
size_t result = 0;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (sa[i] != 0)
|
||||
{
|
||||
r[i] = s[sa[i] - 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
result = i;
|
||||
r[i] = s[n - 1];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t BWT(std::string const & s, std::string & r)
|
||||
{
|
||||
auto const n = s.size();
|
||||
r.assign(n, '\0');
|
||||
return BWT(n, reinterpret_cast<uint8_t const *>(s.data()), reinterpret_cast<uint8_t *>(&r[0]));
|
||||
}
|
||||
|
||||
void RevBWT(size_t n, size_t start, uint8_t const * s, uint8_t * r)
|
||||
{
|
||||
if (n == 0)
|
||||
return;
|
||||
|
||||
FirstColumn first(n, s);
|
||||
LastColumn last(n, start, s);
|
||||
|
||||
auto curr = start + 1;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
ASSERT_LESS(curr, first.Size(), ());
|
||||
ASSERT(first[curr] != kEOS, ());
|
||||
|
||||
r[i] = first[curr];
|
||||
curr = last.Select(r[i], first.Rank(curr));
|
||||
}
|
||||
|
||||
ASSERT_EQUAL(first[curr], kEOS, ());
|
||||
}
|
||||
|
||||
void RevBWT(size_t start, std::string const & s, std::string & r)
|
||||
{
|
||||
auto const n = s.size();
|
||||
r.assign(n, '\0');
|
||||
RevBWT(n, start, reinterpret_cast<uint8_t const *>(s.data()), reinterpret_cast<uint8_t *>(&r[0]));
|
||||
}
|
||||
} // namespace coding
|
||||
60
libs/coding/bwt.hpp
Normal file
60
libs/coding/bwt.hpp
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
// Computes the Burrows-Wheeler transform of the string |s|, stores
|
||||
// result in the string |r|. Note - the size of |r| must be |n|.
|
||||
// Returns the index of the original string among the all sorted
|
||||
// rotations of the |s|.
|
||||
//
|
||||
// *NOTE* in contrast to popular explanations of BWT, we do not append
|
||||
// to |s| trailing '$' that is less than any other character in |s|.
|
||||
// The reason is that |s| can be an arbitrary byte string, with zero
|
||||
// bytes inside, so implementation of this trailing '$' is expensive,
|
||||
// and, actually, not needed.
|
||||
//
|
||||
// For example, if |s| is "abaaba", canonical BWT is:
|
||||
//
|
||||
// Sorted rotations: canonical BWT:
|
||||
// $abaaba a
|
||||
// a$abaab b
|
||||
// aaba$ab b
|
||||
// aba$aba a
|
||||
// * abaaba$ $
|
||||
// ba$abaa a
|
||||
// baaba$a a
|
||||
//
|
||||
// where '*' denotes original string.
|
||||
//
|
||||
// Our implementation will sort rotations in a way as there is an
|
||||
// implicit '$' that is less than any other byte in |s|, but does not
|
||||
// return this '$'. Therefore, the order of rotations will be the same
|
||||
// as above, without the first '$abaaba':
|
||||
//
|
||||
// Sorted rotations: ours BWT:
|
||||
// aabaab b
|
||||
// aabaab b
|
||||
// abaaba a
|
||||
// * abaaba a
|
||||
// baabaa a
|
||||
// baabaa a
|
||||
//
|
||||
// where '*' denotes the index of original string. As one can see,
|
||||
// there are two 'abaaba' strings, but as mentioned, rotations are
|
||||
// sorted like there is an implicit '$' at the end of the original
|
||||
// string. It's possible to get from "ours BWT" to the "original BWT",
|
||||
// see the code for details.
|
||||
//
|
||||
// Complexity: O(n) time and O(n) memory.
|
||||
size_t BWT(size_t n, uint8_t const * s, uint8_t * r);
|
||||
size_t BWT(std::string const & s, std::string & r);
|
||||
|
||||
// Inverse Burrows-Wheeler transform.
|
||||
//
|
||||
// Complexity: O(n) time and O(n) memory.
|
||||
void RevBWT(size_t n, size_t start, uint8_t const * s, uint8_t * r);
|
||||
void RevBWT(size_t start, std::string const & s, std::string & r);
|
||||
} // namespace coding
|
||||
125
libs/coding/bwt_coder.hpp
Normal file
125
libs/coding/bwt_coder.hpp
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/bwt.hpp"
|
||||
#include "coding/huffman.hpp"
|
||||
#include "coding/move_to_front.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
class BWTCoder
|
||||
{
|
||||
public:
|
||||
using BufferT = std::vector<uint8_t>;
|
||||
|
||||
struct Params
|
||||
{
|
||||
size_t m_blockSize = 32000;
|
||||
};
|
||||
|
||||
template <typename Sink>
|
||||
static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s, BufferT & bwtBuffer)
|
||||
{
|
||||
bwtBuffer.resize(n);
|
||||
auto const start = BWT(n, s, bwtBuffer.data());
|
||||
|
||||
MoveToFront mtf;
|
||||
for (auto & b : bwtBuffer)
|
||||
b = mtf.Transform(b);
|
||||
|
||||
WriteVarUint(sink, start);
|
||||
|
||||
HuffmanCoder huffman;
|
||||
huffman.Init(bwtBuffer.begin(), bwtBuffer.end());
|
||||
huffman.WriteEncoding(sink);
|
||||
huffman.EncodeAndWrite(sink, bwtBuffer.begin(), bwtBuffer.end());
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s)
|
||||
{
|
||||
BufferT bwtBuffer;
|
||||
EncodeAndWriteBlock(sink, n, s, bwtBuffer);
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
static void EncodeAndWriteBlock(Sink & sink, std::string const & s)
|
||||
{
|
||||
EncodeAndWriteBlock(sink, s.size(), reinterpret_cast<uint8_t const *>(s.data()));
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
static void EncodeAndWrite(Params const & params, Sink & sink, size_t n, uint8_t const * s)
|
||||
{
|
||||
CHECK(params.m_blockSize != 0, ());
|
||||
CHECK_GREATER(n + params.m_blockSize, n, ());
|
||||
|
||||
BufferT bwtBuffer;
|
||||
|
||||
size_t const numBlocks = (n + params.m_blockSize - 1) / params.m_blockSize;
|
||||
WriteVarUint(sink, numBlocks);
|
||||
for (size_t i = 0; i < n; i += params.m_blockSize)
|
||||
{
|
||||
auto const m = std::min(n - i, params.m_blockSize);
|
||||
EncodeAndWriteBlock(sink, m, s + i, bwtBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
static void ReadAndDecodeBlock(Source & source, BufferT & bwtBuffer, BufferT & revBuffer)
|
||||
{
|
||||
auto const start = ReadVarUint<uint64_t, Source>(source);
|
||||
|
||||
HuffmanCoder huffman;
|
||||
huffman.ReadEncoding(source);
|
||||
|
||||
bwtBuffer.clear();
|
||||
huffman.ReadAndDecode(source, std::back_inserter(bwtBuffer));
|
||||
|
||||
size_t const n = bwtBuffer.size();
|
||||
MoveToFront mtf;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
auto const b = mtf[bwtBuffer[i]];
|
||||
bwtBuffer[i] = b;
|
||||
mtf.Transform(b);
|
||||
}
|
||||
|
||||
if (n != 0)
|
||||
CHECK_LESS(start, n, ());
|
||||
|
||||
revBuffer.resize(n);
|
||||
RevBWT(n, static_cast<size_t>(start), bwtBuffer.data(), revBuffer.data());
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
static BufferT ReadAndDecodeBlock(Source & source)
|
||||
{
|
||||
BufferT bwtBuffer, revBuffer;
|
||||
ReadAndDecodeBlock(source, bwtBuffer, revBuffer);
|
||||
return revBuffer;
|
||||
}
|
||||
|
||||
template <typename Source, typename OutIt>
|
||||
static OutIt ReadAndDecode(Source & source, OutIt it)
|
||||
{
|
||||
auto const numBlocks = ReadVarUint<uint64_t, Source>(source);
|
||||
CHECK_LESS(numBlocks, std::numeric_limits<size_t>::max(), ());
|
||||
|
||||
BufferT bwtBuffer, revBuffer;
|
||||
|
||||
for (size_t i = 0; i < static_cast<size_t>(numBlocks); ++i)
|
||||
{
|
||||
ReadAndDecodeBlock(source, bwtBuffer, revBuffer);
|
||||
std::copy(revBuffer.begin(), revBuffer.end(), it);
|
||||
}
|
||||
return it;
|
||||
}
|
||||
};
|
||||
} // namespace coding
|
||||
59
libs/coding/byte_stream.hpp
Normal file
59
libs/coding/byte_stream.hpp
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/base.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
class ArrayByteSource
|
||||
{
|
||||
public:
|
||||
explicit ArrayByteSource(void const * p) : m_p(static_cast<uint8_t const *>(p)) {}
|
||||
|
||||
uint8_t ReadByte() { return *m_p++; }
|
||||
|
||||
void Read(void * ptr, size_t size)
|
||||
{
|
||||
memcpy(ptr, m_p, size);
|
||||
m_p += size;
|
||||
}
|
||||
|
||||
void const * Ptr() const { return m_p; }
|
||||
uint8_t const * PtrUint8() const { return m_p; }
|
||||
|
||||
void Advance(size_t size) { m_p += size; }
|
||||
|
||||
private:
|
||||
uint8_t const * m_p;
|
||||
};
|
||||
|
||||
template <class StorageT>
|
||||
class PushBackByteSink
|
||||
{
|
||||
public:
|
||||
explicit PushBackByteSink(StorageT & storage) : m_Storage(storage) {}
|
||||
|
||||
void Write(void const * p, size_t size)
|
||||
{
|
||||
// assume input buffer as buffer of bytes
|
||||
uint8_t const * pp = static_cast<uint8_t const *>(p);
|
||||
m_Storage.insert(m_Storage.end(), pp, pp + size);
|
||||
}
|
||||
|
||||
size_t Pos() const { return m_Storage.size(); }
|
||||
|
||||
private:
|
||||
StorageT & m_Storage;
|
||||
};
|
||||
|
||||
class CountingSink
|
||||
{
|
||||
public:
|
||||
CountingSink() : m_Count(0) {}
|
||||
inline void Write(void const *, size_t size) { m_Count += size; }
|
||||
inline size_t GetCount() const { return m_Count; }
|
||||
|
||||
private:
|
||||
size_t m_Count;
|
||||
};
|
||||
60
libs/coding/coding_tests/CMakeLists.txt
Normal file
60
libs/coding/coding_tests/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
project(coding_tests)
|
||||
|
||||
set(SRC
|
||||
base64_test.cpp
|
||||
bit_streams_test.cpp
|
||||
bwt_coder_tests.cpp
|
||||
bwt_tests.cpp
|
||||
compressed_bit_vector_test.cpp
|
||||
csv_reader_test.cpp
|
||||
dd_vector_test.cpp
|
||||
diff_test.cpp
|
||||
elias_coder_test.cpp
|
||||
endianness_test.cpp
|
||||
file_data_test.cpp
|
||||
file_sort_test.cpp
|
||||
files_container_tests.cpp
|
||||
fixed_bits_ddvector_test.cpp
|
||||
geometry_coding_test.cpp
|
||||
hex_test.cpp
|
||||
huffman_test.cpp
|
||||
map_uint32_to_val_tests.cpp
|
||||
mem_file_reader_test.cpp
|
||||
mem_file_writer_test.cpp
|
||||
move_to_front_tests.cpp
|
||||
png_decoder_test.cpp
|
||||
point_coding_tests.cpp
|
||||
reader_cache_test.cpp
|
||||
reader_test.cpp
|
||||
reader_test.hpp
|
||||
reader_writer_ops_test.cpp
|
||||
serdes_json_test.cpp
|
||||
simple_dense_coding_test.cpp
|
||||
sha1_test.cpp
|
||||
sparse_vector_tests.cpp
|
||||
string_utf8_multilang_tests.cpp
|
||||
succinct_ef_test.cpp
|
||||
succinct_mapper_test.cpp
|
||||
test_polylines.cpp
|
||||
test_polylines.hpp
|
||||
text_storage_tests.cpp
|
||||
traffic_test.cpp
|
||||
url_tests.cpp
|
||||
value_opt_string_test.cpp
|
||||
var_record_reader_test.cpp
|
||||
var_serial_vector_test.cpp
|
||||
varint_test.cpp
|
||||
writer_test.cpp
|
||||
xml_parser_tests.cpp
|
||||
zip_creator_test.cpp
|
||||
zip_reader_test.cpp
|
||||
zlib_test.cpp
|
||||
)
|
||||
|
||||
omim_add_test(${PROJECT_NAME} ${SRC})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
platform_tests_support # For csv_reader_test (TODO: Move ScopedFile into a base header)
|
||||
geometry # For traffic_test and serdes_json_test (TODO: Remove dependency)
|
||||
coding
|
||||
)
|
||||
28
libs/coding/coding_tests/base64_test.cpp
Normal file
28
libs/coding/coding_tests/base64_test.cpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/base64.hpp"
|
||||
|
||||
using namespace base64;
|
||||
|
||||
UNIT_TEST(Base64_Smoke)
|
||||
{
|
||||
char const * bytes[] = {"H", "He", "Hel", "Hell", "Hello", "Hello,", "Hello, ", "Hello, World!"};
|
||||
char const * encoded[] = {
|
||||
"SA==", "SGU=", "SGVs", "SGVsbA==", "SGVsbG8=", "SGVsbG8s", "SGVsbG8sIA==", "SGVsbG8sIFdvcmxkIQ=="};
|
||||
|
||||
TEST_EQUAL(ARRAY_SIZE(bytes), ARRAY_SIZE(encoded), ());
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(bytes); ++i)
|
||||
{
|
||||
TEST_EQUAL(Encode(bytes[i]), encoded[i], ());
|
||||
TEST_EQUAL(Decode(encoded[i]), bytes[i], ());
|
||||
TEST_EQUAL(Decode(Encode(bytes[i])), bytes[i], ());
|
||||
TEST_EQUAL(Encode(Decode(encoded[i])), encoded[i], ());
|
||||
}
|
||||
|
||||
char const * str = "MapsWithMe is the offline maps application for any device in the world.";
|
||||
char const * encStr =
|
||||
"TWFwc1dpdGhNZSBpcyB0aGUgb2ZmbGluZSBtYXBzIGFwcGxpY2F0aW9uIGZvciBhbnkgZGV2aWNlIGluIHRoZSB3b3JsZC4=";
|
||||
TEST_EQUAL(Encode(str), encStr, ());
|
||||
TEST_EQUAL(Decode(encStr), str, ());
|
||||
}
|
||||
105
libs/coding/coding_tests/bit_streams_test.cpp
Normal file
105
libs/coding/coding_tests/bit_streams_test.cpp
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/bit_streams.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/bits.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
UNIT_TEST(BitStreams_Smoke)
|
||||
{
|
||||
uniform_int_distribution<uint32_t> randomBytesDistribution(0, 255);
|
||||
mt19937 rng(0);
|
||||
vector<pair<uint8_t, uint32_t>> nums;
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
{
|
||||
uint32_t numBits = randomBytesDistribution(rng) % 8;
|
||||
uint8_t num = static_cast<uint8_t>(randomBytesDistribution(rng) >> (CHAR_BIT - numBits));
|
||||
nums.push_back(make_pair(num, numBits));
|
||||
}
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
{
|
||||
uint32_t numBits = 8;
|
||||
uint8_t num = static_cast<uint8_t>(randomBytesDistribution(rng));
|
||||
nums.push_back(make_pair(num, numBits));
|
||||
}
|
||||
|
||||
vector<uint8_t> encodedBits;
|
||||
{
|
||||
MemWriter<vector<uint8_t>> encodedBitsWriter(encodedBits);
|
||||
BitWriter<MemWriter<vector<uint8_t>>> bitSink(encodedBitsWriter);
|
||||
for (size_t i = 0; i < nums.size(); ++i)
|
||||
bitSink.Write(nums[i].first, nums[i].second);
|
||||
}
|
||||
|
||||
MemReader encodedBitsReader(encodedBits.data(), encodedBits.size());
|
||||
ReaderSource<MemReader> src(encodedBitsReader);
|
||||
BitReader<ReaderSource<MemReader>> bitsSource(src);
|
||||
for (size_t i = 0; i < nums.size(); ++i)
|
||||
{
|
||||
uint8_t num = bitsSource.Read(nums[i].second);
|
||||
TEST_EQUAL(num, nums[i].first, (i));
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(BitStreams_T1)
|
||||
{
|
||||
using TBuffer = vector<uint8_t>;
|
||||
using TWriter = MemWriter<TBuffer>;
|
||||
|
||||
TBuffer buf;
|
||||
{
|
||||
TWriter w(buf);
|
||||
BitWriter<TWriter> bits(w);
|
||||
|
||||
bits.Write(0, 3);
|
||||
bits.Write(3, 3);
|
||||
bits.Write(6, 3);
|
||||
}
|
||||
|
||||
TEST_EQUAL(buf.size(), 2, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(BitStreams_Large)
|
||||
{
|
||||
using TBuffer = vector<uint8_t>;
|
||||
using TWriter = MemWriter<TBuffer>;
|
||||
|
||||
uint64_t const kMask = 0x0123456789abcdef;
|
||||
|
||||
TBuffer buf;
|
||||
{
|
||||
TWriter w(buf);
|
||||
BitWriter<TWriter> bits(w);
|
||||
|
||||
for (int i = 0; i <= 64; ++i)
|
||||
if (i <= 32)
|
||||
bits.WriteAtMost32Bits(static_cast<uint32_t>(kMask), i);
|
||||
else
|
||||
bits.WriteAtMost64Bits(kMask, i);
|
||||
}
|
||||
|
||||
{
|
||||
MemReader r(buf.data(), buf.size());
|
||||
ReaderSource<MemReader> src(r);
|
||||
BitReader<ReaderSource<MemReader>> bits(src);
|
||||
for (int i = 0; i <= 64; ++i)
|
||||
{
|
||||
uint64_t const mask = bits::GetFullMask(i);
|
||||
uint64_t const value = i <= 32 ? bits.ReadAtMost32Bits(i) : bits.ReadAtMost64Bits(i);
|
||||
TEST_EQUAL(value, kMask & mask, (i));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
107
libs/coding/coding_tests/bwt_coder_tests.cpp
Normal file
107
libs/coding/coding_tests/bwt_coder_tests.cpp
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/bwt_coder.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
using namespace coding;
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
string EncodeDecode(BWTCoder::Params const & params, string const & s)
|
||||
{
|
||||
vector<uint8_t> data;
|
||||
|
||||
{
|
||||
MemWriter<decltype(data)> sink(data);
|
||||
BWTCoder::EncodeAndWrite(params, sink, s.size(), reinterpret_cast<uint8_t const *>(s.data()));
|
||||
}
|
||||
|
||||
string result;
|
||||
{
|
||||
MemReader reader(data.data(), data.size());
|
||||
ReaderSource<MemReader> source(reader);
|
||||
|
||||
BWTCoder::ReadAndDecode(source, back_inserter(result));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
string EncodeDecodeBlock(string const & s)
|
||||
{
|
||||
vector<uint8_t> data;
|
||||
|
||||
{
|
||||
MemWriter<decltype(data)> sink(data);
|
||||
BWTCoder::EncodeAndWriteBlock(sink, s.size(), reinterpret_cast<uint8_t const *>(s.data()));
|
||||
}
|
||||
|
||||
string result;
|
||||
{
|
||||
MemReader reader(data.data(), data.size());
|
||||
ReaderSource<MemReader> source(reader);
|
||||
|
||||
auto const buffer = BWTCoder::ReadAndDecodeBlock(source);
|
||||
result.assign(buffer.begin(), buffer.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
UNIT_TEST(BWTEncoder_Smoke)
|
||||
{
|
||||
for (size_t blockSize = 1; blockSize < 100; ++blockSize)
|
||||
{
|
||||
BWTCoder::Params params;
|
||||
|
||||
params.m_blockSize = blockSize;
|
||||
string const s = "abracadabra";
|
||||
TEST_EQUAL(s, EncodeDecodeBlock(s), ());
|
||||
TEST_EQUAL(s, EncodeDecode(params, s), (blockSize));
|
||||
}
|
||||
|
||||
string const strings[] = {"", "mississippi", "again and again and again"};
|
||||
for (auto const & s : strings)
|
||||
{
|
||||
TEST_EQUAL(s, EncodeDecodeBlock(s), ());
|
||||
TEST_EQUAL(s, EncodeDecode(BWTCoder::Params{}, s), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(BWT_Large)
|
||||
{
|
||||
string s;
|
||||
for (size_t i = 0; i < 10000; ++i)
|
||||
s += "mississippi";
|
||||
TEST_EQUAL(s, EncodeDecode(BWTCoder::Params{}, s), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(BWT_AllBytes)
|
||||
{
|
||||
int kSeed = 42;
|
||||
int kMin = 1;
|
||||
int kMax = 1000;
|
||||
|
||||
mt19937 engine(kSeed);
|
||||
uniform_int_distribution<int> uid(kMin, kMax);
|
||||
|
||||
string s;
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
{
|
||||
auto const count = uid(engine);
|
||||
ASSERT_GREATER_OR_EQUAL(count, kMin, ());
|
||||
ASSERT_LESS_OR_EQUAL(count, kMax, ());
|
||||
for (int j = 0; j < count; ++j)
|
||||
s.push_back(static_cast<uint8_t>(i));
|
||||
}
|
||||
shuffle(s.begin(), s.end(), engine);
|
||||
TEST_EQUAL(s, EncodeDecode(BWTCoder::Params{}, s), ());
|
||||
}
|
||||
} // namespace
|
||||
90
libs/coding/coding_tests/bwt_tests.cpp
Normal file
90
libs/coding/coding_tests/bwt_tests.cpp
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/bwt.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
using namespace coding;
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
string RevRevBWT(string const & s)
|
||||
{
|
||||
string r;
|
||||
auto const start = BWT(s, r);
|
||||
|
||||
string rr;
|
||||
RevBWT(start, r, rr);
|
||||
return rr;
|
||||
}
|
||||
|
||||
UNIT_TEST(BWT_Smoke)
|
||||
{
|
||||
{
|
||||
TEST_EQUAL(BWT(0 /* n */, nullptr /* s */, nullptr /* r */), 0, ());
|
||||
}
|
||||
|
||||
{
|
||||
string r;
|
||||
TEST_EQUAL(BWT(string() /* s */, r /* r */), 0, ());
|
||||
}
|
||||
|
||||
{
|
||||
string const s = "aaaaaa";
|
||||
string r;
|
||||
TEST_EQUAL(BWT(s, r), 5, ());
|
||||
TEST_EQUAL(r, s, ());
|
||||
}
|
||||
|
||||
{
|
||||
string const s = "mississippi";
|
||||
string r;
|
||||
TEST_EQUAL(BWT(s, r), 4, ());
|
||||
TEST_EQUAL(r, "pssmipissii", ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(RevBWT_Smoke)
|
||||
{
|
||||
string const strings[] = {"abaaba", "mississippi", "a b b", "Again and again and again"};
|
||||
for (auto const & s : strings)
|
||||
TEST_EQUAL(s, RevRevBWT(s), ());
|
||||
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
{
|
||||
string const s(i, '\0');
|
||||
TEST_EQUAL(s, RevRevBWT(s), ());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
{
|
||||
string const s(i, 'a' + (i % 3));
|
||||
TEST_EQUAL(s, RevRevBWT(s), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(RevBWT_AllBytes)
|
||||
{
|
||||
int kSeed = 42;
|
||||
int kMin = 1;
|
||||
int kMax = 10;
|
||||
|
||||
mt19937 engine(kSeed);
|
||||
uniform_int_distribution<int> uid(kMin, kMax);
|
||||
|
||||
string s;
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
{
|
||||
auto const count = uid(engine);
|
||||
ASSERT_GREATER_OR_EQUAL(count, kMin, ());
|
||||
ASSERT_LESS_OR_EQUAL(count, kMax, ());
|
||||
for (int j = 0; j < count; ++j)
|
||||
s.push_back(static_cast<uint8_t>(i));
|
||||
}
|
||||
shuffle(s.begin(), s.end(), engine);
|
||||
TEST_EQUAL(s, RevRevBWT(s), ());
|
||||
}
|
||||
} // namespace
|
||||
455
libs/coding/coding_tests/compressed_bit_vector_test.cpp
Normal file
455
libs/coding/coding_tests/compressed_bit_vector_test.cpp
Normal file
|
|
@ -0,0 +1,455 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/compressed_bit_vector.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
void Intersect(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<uint64_t> & result)
|
||||
{
|
||||
sort(setBits1.begin(), setBits1.end());
|
||||
sort(setBits2.begin(), setBits2.end());
|
||||
set_intersection(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(), back_inserter(result));
|
||||
}
|
||||
|
||||
void Subtract(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<uint64_t> & result)
|
||||
{
|
||||
sort(setBits1.begin(), setBits1.end());
|
||||
sort(setBits2.begin(), setBits2.end());
|
||||
set_difference(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(), back_inserter(result));
|
||||
}
|
||||
|
||||
void Union(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<uint64_t> & result)
|
||||
{
|
||||
sort(setBits1.begin(), setBits1.end());
|
||||
sort(setBits2.begin(), setBits2.end());
|
||||
set_union(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(), back_inserter(result));
|
||||
}
|
||||
|
||||
template <typename TBinaryOp>
|
||||
void CheckBinaryOp(TBinaryOp op, vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
|
||||
coding::CompressedBitVector const & cbv)
|
||||
{
|
||||
vector<uint64_t> expected;
|
||||
op(setBits1, setBits2, expected);
|
||||
TEST_EQUAL(expected.size(), cbv.PopCount(), ());
|
||||
|
||||
for (size_t i = 0; i < expected.size(); ++i)
|
||||
TEST(cbv.GetBit(expected[i]), ());
|
||||
}
|
||||
|
||||
void CheckIntersection(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
|
||||
coding::CompressedBitVector const & cbv)
|
||||
{
|
||||
CheckBinaryOp(&Intersect, setBits1, setBits2, cbv);
|
||||
}
|
||||
|
||||
void CheckSubtraction(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, coding::CompressedBitVector const & cbv)
|
||||
{
|
||||
CheckBinaryOp(&Subtract, setBits1, setBits2, cbv);
|
||||
}
|
||||
|
||||
void CheckUnion(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, coding::CompressedBitVector const & cbv)
|
||||
{
|
||||
CheckBinaryOp(&Union, setBits1, setBits2, cbv);
|
||||
}
|
||||
|
||||
void CheckUnion(vector<uint64_t> & setBits1, coding::CompressedBitVector::StorageStrategy strategy1,
|
||||
vector<uint64_t> & setBits2, coding::CompressedBitVector::StorageStrategy strategy2,
|
||||
coding::CompressedBitVector::StorageStrategy resultStrategy)
|
||||
{
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
TEST_EQUAL(strategy1, cbv1->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(strategy2, cbv2->GetStorageStrategy(), ());
|
||||
|
||||
auto cbv3 = coding::CompressedBitVector::Union(*cbv1, *cbv2);
|
||||
TEST(cbv3.get(), ());
|
||||
TEST_EQUAL(resultStrategy, cbv3->GetStorageStrategy(), ());
|
||||
CheckUnion(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect1)
|
||||
{
|
||||
size_t const kNumBits = 100;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < kNumBits; ++i)
|
||||
{
|
||||
if (i > 0)
|
||||
setBits1.push_back(i);
|
||||
if (i + 1 < kNumBits)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
|
||||
CheckIntersection(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect2)
|
||||
{
|
||||
size_t const kNumBits = 100;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < kNumBits; ++i)
|
||||
{
|
||||
if (i <= kNumBits / 2)
|
||||
setBits1.push_back(i);
|
||||
if (i >= kNumBits / 2)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
CheckIntersection(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect3)
|
||||
{
|
||||
size_t const kNumBits = 100;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < kNumBits; ++i)
|
||||
{
|
||||
if (i % 2 == 0)
|
||||
setBits1.push_back(i);
|
||||
if (i % 3 == 0)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
CheckIntersection(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect4)
|
||||
{
|
||||
size_t const kNumBits = 1000;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < kNumBits; ++i)
|
||||
{
|
||||
if (i % 100 == 0)
|
||||
setBits1.push_back(i);
|
||||
if (i % 150 == 0)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
CheckIntersection(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Subtract1)
|
||||
{
|
||||
vector<uint64_t> setBits1 = {0, 1, 2, 3, 4, 5, 6};
|
||||
vector<uint64_t> setBits2 = {1, 2, 3, 4, 5, 6, 7};
|
||||
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv1->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv2->GetStorageStrategy(), ());
|
||||
|
||||
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
|
||||
TEST(cbv3.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
|
||||
CheckSubtraction(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Subtract2)
|
||||
{
|
||||
vector<uint64_t> setBits1;
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
setBits1.push_back(i);
|
||||
|
||||
vector<uint64_t> setBits2 = {9, 14};
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv1->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv2->GetStorageStrategy(), ());
|
||||
|
||||
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
|
||||
TEST(cbv3.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
|
||||
CheckSubtraction(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Subtract3)
|
||||
{
|
||||
vector<uint64_t> setBits1 = {0, 9};
|
||||
vector<uint64_t> setBits2 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv1->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv2->GetStorageStrategy(), ());
|
||||
|
||||
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
|
||||
TEST(cbv3.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
CheckSubtraction(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Subtract4)
|
||||
{
|
||||
vector<uint64_t> setBits1 = {0, 5, 15};
|
||||
vector<uint64_t> setBits2 = {0, 10};
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv1->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv2->GetStorageStrategy(), ());
|
||||
|
||||
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
|
||||
TEST(cbv3.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
CheckSubtraction(setBits1, setBits2, *cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Union_Smoke)
|
||||
{
|
||||
vector<uint64_t> setBits1 = {};
|
||||
vector<uint64_t> setBits2 = {};
|
||||
|
||||
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Sparse /* strategy1 */, setBits2,
|
||||
coding::CompressedBitVector::StorageStrategy::Sparse /* strategy2 */,
|
||||
coding::CompressedBitVector::StorageStrategy::Sparse /* resultStrategy */);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Union1)
|
||||
{
|
||||
vector<uint64_t> setBits1 = {};
|
||||
vector<uint64_t> setBits2 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
|
||||
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Sparse /* strategy1 */, setBits2,
|
||||
coding::CompressedBitVector::StorageStrategy::Dense /* strategy2 */,
|
||||
coding::CompressedBitVector::StorageStrategy::Dense /* resultStrategy */);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Union2)
|
||||
{
|
||||
vector<uint64_t> setBits1 = {256, 1024};
|
||||
vector<uint64_t> setBits2 = {0, 32, 64};
|
||||
|
||||
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Sparse /* strategy1 */, setBits2,
|
||||
coding::CompressedBitVector::StorageStrategy::Sparse /* strategy2 */,
|
||||
coding::CompressedBitVector::StorageStrategy::Sparse /* resultStrategy */);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Union3)
|
||||
{
|
||||
vector<uint64_t> setBits1 = {0, 1, 2, 3, 4, 5, 6};
|
||||
|
||||
vector<uint64_t> setBits2;
|
||||
for (int i = 0; i < 256; ++i)
|
||||
setBits2.push_back(i);
|
||||
|
||||
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Dense /* strategy1 */, setBits2,
|
||||
coding::CompressedBitVector::StorageStrategy::Dense /* strategy2 */,
|
||||
coding::CompressedBitVector::StorageStrategy::Dense /* resultStrategy */);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Union4)
|
||||
{
|
||||
vector<uint64_t> setBits1;
|
||||
for (uint64_t i = 0; i < coding::DenseCBV::kBlockSize; ++i)
|
||||
setBits1.push_back(i);
|
||||
|
||||
vector<uint64_t> setBits2 = {1000000000};
|
||||
|
||||
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Dense /* strategy1 */, setBits2,
|
||||
coding::CompressedBitVector::StorageStrategy::Sparse /* strategy2 */,
|
||||
coding::CompressedBitVector::StorageStrategy::Sparse /* resultStrategy */);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_SerializationDense)
|
||||
{
|
||||
int const kNumBits = 100;
|
||||
vector<uint64_t> setBits;
|
||||
for (size_t i = 0; i < kNumBits; ++i)
|
||||
setBits.push_back(i);
|
||||
vector<uint8_t> buf;
|
||||
{
|
||||
MemWriter<vector<uint8_t>> writer(buf);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
|
||||
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv->GetStorageStrategy(), ());
|
||||
cbv->Serialize(writer);
|
||||
}
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
auto cbv = coding::CompressedBitVectorBuilder::DeserializeFromReader(reader);
|
||||
TEST(cbv.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
|
||||
for (size_t i = 0; i < setBits.size(); ++i)
|
||||
TEST(cbv->GetBit(setBits[i]), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_SerializationSparse)
|
||||
{
|
||||
int const kNumBits = 100;
|
||||
vector<uint64_t> setBits;
|
||||
for (size_t i = 0; i < kNumBits; ++i)
|
||||
if (i % 10 == 0)
|
||||
setBits.push_back(i);
|
||||
vector<uint8_t> buf;
|
||||
{
|
||||
MemWriter<vector<uint8_t>> writer(buf);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv->GetStorageStrategy(), ());
|
||||
cbv->Serialize(writer);
|
||||
}
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
auto cbv = coding::CompressedBitVectorBuilder::DeserializeFromReader(reader);
|
||||
TEST(cbv.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
|
||||
for (size_t i = 0; i < setBits.size(); ++i)
|
||||
TEST(cbv->GetBit(setBits[i]), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_ForEach)
|
||||
{
|
||||
int const kNumBits = 150;
|
||||
vector<uint64_t> denseBits;
|
||||
vector<uint64_t> sparseBits;
|
||||
for (size_t i = 0; i < kNumBits; ++i)
|
||||
{
|
||||
denseBits.push_back(i);
|
||||
if (i % 15 == 0)
|
||||
sparseBits.push_back(i);
|
||||
}
|
||||
auto denseCBV = coding::CompressedBitVectorBuilder::FromBitPositions(denseBits);
|
||||
auto sparseCBV = coding::CompressedBitVectorBuilder::FromBitPositions(sparseBits);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, denseCBV->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, sparseCBV->GetStorageStrategy(), ());
|
||||
|
||||
set<uint64_t> denseSet;
|
||||
uint64_t maxPos = 0;
|
||||
coding::CompressedBitVectorEnumerator::ForEach(*denseCBV, [&](uint64_t pos)
|
||||
{
|
||||
denseSet.insert(pos);
|
||||
maxPos = max(maxPos, pos);
|
||||
});
|
||||
TEST_EQUAL(denseSet.size(), kNumBits, ());
|
||||
TEST_EQUAL(maxPos, kNumBits - 1, ());
|
||||
|
||||
coding::CompressedBitVectorEnumerator::ForEach(*sparseCBV, [](uint64_t pos) { TEST_EQUAL(pos % 15, 0, ()); });
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_DenseOneBit)
|
||||
{
|
||||
vector<uint64_t> setBits = {0};
|
||||
unique_ptr<coding::DenseCBV> cbv(new coding::DenseCBV(setBits));
|
||||
TEST_EQUAL(cbv->PopCount(), 1, ());
|
||||
coding::CompressedBitVectorEnumerator::ForEach(*cbv, [&](uint64_t pos) { TEST_EQUAL(pos, 0, ()); });
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_LeaveFirstNBitsSmoke)
|
||||
{
|
||||
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(vector<uint64_t>{});
|
||||
TEST_EQUAL(cbv->PopCount(), 0, ());
|
||||
|
||||
cbv = cbv->LeaveFirstSetNBits(0);
|
||||
TEST_EQUAL(cbv->PopCount(), 0, ());
|
||||
|
||||
cbv = cbv->LeaveFirstSetNBits(200);
|
||||
TEST_EQUAL(cbv->PopCount(), 0, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_DenseLeaveFirstNBits)
|
||||
{
|
||||
{
|
||||
vector<uint64_t> setBits;
|
||||
setBits.assign(coding::DenseCBV::kBlockSize * 4, 1);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
|
||||
TEST_EQUAL(cbv->PopCount(), coding::DenseCBV::kBlockSize * 4, ());
|
||||
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ());
|
||||
|
||||
cbv = cbv->LeaveFirstSetNBits(0);
|
||||
TEST_EQUAL(cbv->PopCount(), 0, ());
|
||||
}
|
||||
|
||||
{
|
||||
vector<uint64_t> setBits;
|
||||
for (uint64_t i = 0; i < 100; ++i)
|
||||
setBits.push_back(2 * i);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
|
||||
TEST_EQUAL(cbv->PopCount(), 100, ());
|
||||
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ());
|
||||
|
||||
cbv = cbv->LeaveFirstSetNBits(50);
|
||||
TEST_EQUAL(cbv->PopCount(), 50, ());
|
||||
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ());
|
||||
|
||||
for (uint64_t i = 0; i < 50; ++i)
|
||||
{
|
||||
TEST(cbv->GetBit(2 * i), ());
|
||||
TEST(!cbv->GetBit(2 * i + 1), ());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_SparseLeaveFirstNBits)
|
||||
{
|
||||
vector<uint64_t> setBits;
|
||||
for (int p = 0; p < 10; ++p)
|
||||
setBits.push_back(static_cast<uint64_t>(1) << p);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
|
||||
TEST_EQUAL(cbv->PopCount(), 10, ());
|
||||
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Sparse, ());
|
||||
|
||||
cbv = cbv->LeaveFirstSetNBits(100);
|
||||
TEST_EQUAL(cbv->PopCount(), 10, ());
|
||||
for (uint64_t bit = 0; bit < (1 << 10); ++bit)
|
||||
if (bit != 0 && (bit & (bit - 1)) == 0)
|
||||
TEST(cbv->GetBit(bit), (bit));
|
||||
else
|
||||
TEST(!cbv->GetBit(bit), (bit));
|
||||
|
||||
cbv = cbv->LeaveFirstSetNBits(8);
|
||||
TEST_EQUAL(cbv->PopCount(), 8, ());
|
||||
for (uint64_t bit = 0; bit < (1 << 10); ++bit)
|
||||
if (bit != 0 && (bit & (bit - 1)) == 0 && bit < (1 << 8))
|
||||
TEST(cbv->GetBit(bit), (bit));
|
||||
else
|
||||
TEST(!cbv->GetBit(bit), (bit));
|
||||
|
||||
cbv = cbv->LeaveFirstSetNBits(0);
|
||||
TEST_EQUAL(cbv->PopCount(), 0, ());
|
||||
for (uint64_t bit = 0; bit < (1 << 10); ++bit)
|
||||
TEST(!cbv->GetBit(bit), (bit));
|
||||
}
|
||||
224
libs/coding/coding_tests/csv_reader_test.cpp
Normal file
224
libs/coding/coding_tests/csv_reader_test.cpp
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/csv_reader.hpp"
|
||||
#include "coding/file_reader.hpp"
|
||||
|
||||
#include "platform/platform_tests_support/scoped_file.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace csv_reader_test
|
||||
{
|
||||
using platform::tests_support::ScopedFile;
|
||||
|
||||
using Row = coding::CSVReader::Row;
|
||||
using Rows = coding::CSVReader::Rows;
|
||||
|
||||
namespace
|
||||
{
|
||||
std::string const kCSV1 = "a,b,c,d\ne,f,g h";
|
||||
std::string const kCSV2 = "a,b,cd a b, c";
|
||||
std::string const kCSV3 = "";
|
||||
std::string const kCSV4 = "1,2\n3,4\n5,6";
|
||||
std::string const kCSV5 = "1,2\n3,4\n\n5,6\n";
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(CSVReaderSmoke)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV1);
|
||||
{
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
coding::CSVReader reader(fileReader, false /* hasHeader */);
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file.size(), 2, ());
|
||||
Row const firstRow = {"a", "b", "c", "d"};
|
||||
TEST_EQUAL(file[0], firstRow, ());
|
||||
Row const secondRow = {"e", "f", "g h"};
|
||||
TEST_EQUAL(file[1], secondRow, ());
|
||||
}
|
||||
{
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
coding::CSVReader reader(fileReader, true /* hasHeader */);
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file.size(), 1, ());
|
||||
Row const headerRow = {"a", "b", "c", "d"};
|
||||
TEST_EQUAL(reader.GetHeader(), headerRow, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderReadLine)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV4);
|
||||
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
|
||||
coding::CSVReader reader(sf.GetFullPath());
|
||||
size_t index = 0;
|
||||
while (auto const optionalRow = reader.ReadRow())
|
||||
{
|
||||
TEST_EQUAL(*optionalRow, answer[index], ());
|
||||
++index;
|
||||
}
|
||||
TEST_EQUAL(index, answer.size(), ());
|
||||
TEST(!reader.ReadRow(), ());
|
||||
TEST(!reader.ReadRow(), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderCustomDelimiter)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV2);
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
coding::CSVReader reader(fileReader, false /* hasHeader */, ' ');
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file.size(), 1, ());
|
||||
Row const firstRow = {"a,b,cd", "a", "b,", "c"};
|
||||
TEST_EQUAL(file[0], firstRow, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderEmptyFile)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV3);
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
|
||||
coding::CSVReader reader(fileReader);
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file.size(), 0, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderDifferentReaders)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV4);
|
||||
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
|
||||
{
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
coding::CSVReader reader(fileReader);
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file, answer, ());
|
||||
}
|
||||
{
|
||||
coding::CSVReader reader(sf.GetFullPath());
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file, answer, ());
|
||||
}
|
||||
{
|
||||
std::ifstream stream(sf.GetFullPath());
|
||||
coding::CSVReader reader(stream);
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file, answer, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderEmptyLines)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV5);
|
||||
Rows const answer = {{"1", "2"}, {"3", "4"}, {}, {"5", "6"}};
|
||||
{
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
coding::CSVReader reader(fileReader);
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file, answer, ());
|
||||
}
|
||||
{
|
||||
coding::CSVReader reader(sf.GetFullPath());
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file, answer, ());
|
||||
}
|
||||
{
|
||||
std::ifstream stream(sf.GetFullPath());
|
||||
coding::CSVReader reader(stream);
|
||||
auto const file = reader.ReadAll();
|
||||
TEST_EQUAL(file, answer, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderForEachRow)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV4);
|
||||
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
auto reader = coding::CSVReader(fileReader);
|
||||
size_t index = 0;
|
||||
reader.ForEachRow([&](auto const & row)
|
||||
{
|
||||
TEST_EQUAL(row, answer[index], ());
|
||||
++index;
|
||||
});
|
||||
TEST_EQUAL(answer.size(), index, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderIterator)
|
||||
{
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kCSV4);
|
||||
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
|
||||
{
|
||||
FileReader fileReader(sf.GetFullPath());
|
||||
coding::CSVRunner runner((coding::CSVReader(fileReader)));
|
||||
|
||||
auto it = runner.begin();
|
||||
TEST_EQUAL(*it, answer[0], ());
|
||||
++it;
|
||||
TEST_EQUAL(*it, answer[1], ());
|
||||
auto it2 = it++;
|
||||
TEST(it2 == it, ());
|
||||
TEST_EQUAL(*it2, answer[1], ());
|
||||
TEST_EQUAL(*it, answer[2], ());
|
||||
++it;
|
||||
TEST(it == runner.end(), ());
|
||||
}
|
||||
{
|
||||
size_t index = 0;
|
||||
for (auto const & row : coding::CSVRunner(coding::CSVReader(sf.GetFullPath())))
|
||||
{
|
||||
TEST_EQUAL(row, answer[index], ());
|
||||
++index;
|
||||
}
|
||||
TEST_EQUAL(index, answer.size(), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderEmptyColumns)
|
||||
{
|
||||
auto const kContentWithEmptyColumns = ",,2,,4,\n,,,,,";
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kContentWithEmptyColumns);
|
||||
Rows const answer = {{"", "", "2", "", "4", ""}, {"", "", "", "", "", ""}};
|
||||
coding::CSVReader reader(sf.GetFullPath());
|
||||
size_t index = 0;
|
||||
while (auto const optionalRow = reader.ReadRow())
|
||||
{
|
||||
TEST_EQUAL(*optionalRow, answer[index], ());
|
||||
++index;
|
||||
}
|
||||
TEST_EQUAL(index, answer.size(), ());
|
||||
TEST(!reader.ReadRow(), ());
|
||||
TEST(!reader.ReadRow(), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CSVReaderQuotes)
|
||||
{
|
||||
auto const kContentWithQuotes =
|
||||
R"(noquotes, "" , "with space","with, comma","""double"" quotes","""double,"", commas", """""",)";
|
||||
auto const fileName = "test.csv";
|
||||
ScopedFile sf(fileName, kContentWithQuotes);
|
||||
Rows const answer = {
|
||||
{"noquotes", "", "with space", "with, comma", "\"double\" quotes", "\"double,\", commas", "\"\"", ""}};
|
||||
coding::CSVReader reader(sf.GetFullPath());
|
||||
size_t index = 0;
|
||||
while (auto const optionalRow = reader.ReadRow())
|
||||
{
|
||||
TEST_EQUAL(*optionalRow, answer[index], ());
|
||||
++index;
|
||||
}
|
||||
TEST_EQUAL(index, answer.size(), ());
|
||||
TEST(!reader.ReadRow(), ());
|
||||
TEST(!reader.ReadRow(), ());
|
||||
}
|
||||
|
||||
} // namespace csv_reader_test
|
||||
45
libs/coding/coding_tests/dd_vector_test.cpp
Normal file
45
libs/coding/coding_tests/dd_vector_test.cpp
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/dd_vector.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
UNIT_TEST(DDVector_Smoke)
|
||||
{
|
||||
std::vector<uint16_t> data;
|
||||
// Push size. Big endian is used.
|
||||
data.push_back(1);
|
||||
data.push_back(2);
|
||||
data.push_back(3);
|
||||
typedef DDVector<uint16_t, MemReader> Vector;
|
||||
MemReader reader(reinterpret_cast<char const *>(&data[0]), data.size() * sizeof(data[0]));
|
||||
Vector v(reader);
|
||||
TEST_EQUAL(3, v.size(), ());
|
||||
TEST_EQUAL(1, v[0], ());
|
||||
TEST_EQUAL(2, v[1], ());
|
||||
TEST_EQUAL(3, v[2], ());
|
||||
Vector::const_iterator it = v.begin();
|
||||
for (auto const value : v)
|
||||
TEST_EQUAL(value, *it++, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DDVector_IncorrectSize)
|
||||
{
|
||||
typedef DDVector<uint16_t, MemReader> Vector;
|
||||
char const data[] = "ab";
|
||||
MemReader reader(data, ARRAY_SIZE(data));
|
||||
|
||||
bool exceptionCaught = false;
|
||||
try
|
||||
{
|
||||
Vector v(reader);
|
||||
}
|
||||
catch (Vector::OpenException & e)
|
||||
{
|
||||
exceptionCaught = true;
|
||||
}
|
||||
|
||||
TEST(exceptionCaught, ());
|
||||
}
|
||||
278
libs/coding/coding_tests/diff_test.cpp
Normal file
278
libs/coding/coding_tests/diff_test.cpp
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/byte_stream.hpp"
|
||||
#include "coding/dd_vector.hpp"
|
||||
#include "coding/diff.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include "base/rolling_hash.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
UNIT_TEST(MyersSimpleDiff)
|
||||
{
|
||||
vector<char> tmp;
|
||||
PushBackByteSink<vector<char>> sink(tmp);
|
||||
TEST_EQUAL(4, diff::DiffMyersSimple(string("axxxb"), string("cxxxd"), 5, sink), ());
|
||||
TEST_EQUAL(5, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 10, sink), ());
|
||||
TEST_EQUAL(5, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 5, sink), ());
|
||||
TEST_EQUAL(-1, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 4, sink), ());
|
||||
TEST_EQUAL(-1, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 2, sink), ());
|
||||
TEST_EQUAL(-1, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 1, sink), ());
|
||||
}
|
||||
|
||||
class TestPatchWriter
|
||||
{
|
||||
public:
|
||||
template <typename IterT>
|
||||
void WriteData(IterT it, uint64_t n)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i, ++it)
|
||||
m_Stream << *it;
|
||||
}
|
||||
|
||||
void WriteOperation(uint64_t op) { m_Stream << op << "."; }
|
||||
|
||||
string Str() { return m_Stream.str(); }
|
||||
|
||||
private:
|
||||
ostringstream m_Stream;
|
||||
};
|
||||
|
||||
UNIT_TEST(PatchCoderCopyFirst)
|
||||
{
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
patchCoder.Copy(2);
|
||||
patchCoder.Copy(1);
|
||||
patchCoder.Insert("ab", 2);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "6.ab5.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PatchCoderInsertFirst)
|
||||
{
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
patchCoder.Insert("abc", 3);
|
||||
patchCoder.Copy(3);
|
||||
patchCoder.Insert("d", 1);
|
||||
patchCoder.Insert("e", 1);
|
||||
patchCoder.Delete(5);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "abc7.6.de5.11.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PatchCoderDeleteFirst)
|
||||
{
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
patchCoder.Delete(3);
|
||||
patchCoder.Copy(2);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "6.5.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PatchCoderEmptyPatch)
|
||||
{
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "", ());
|
||||
}
|
||||
|
||||
// PatchCoder mock.
|
||||
// Uses simple diff format "=x.-x.+str" where x is number, "." - operation separator, str - string.
|
||||
// Ignores commands with n == 0, but doesn't merge same commands together, i.e. "=2.=2." won't be
|
||||
// merged into "=4."
|
||||
class TestPatchCoder
|
||||
{
|
||||
public:
|
||||
typedef size_t size_type;
|
||||
|
||||
void Copy(size_t n)
|
||||
{
|
||||
if (n != 0)
|
||||
m_Stream << "=" << n << ".";
|
||||
}
|
||||
|
||||
void Delete(size_t n)
|
||||
{
|
||||
if (n != 0)
|
||||
m_Stream << "-" << n << ".";
|
||||
}
|
||||
|
||||
template <typename IterT>
|
||||
void Insert(IterT it, size_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
return;
|
||||
m_Stream << "+";
|
||||
for (size_t i = 0; i < n; ++i, ++it)
|
||||
m_Stream << *it;
|
||||
m_Stream << ".";
|
||||
}
|
||||
void Finalize() {}
|
||||
string Str() { return m_Stream.str(); }
|
||||
|
||||
private:
|
||||
ostringstream m_Stream;
|
||||
};
|
||||
|
||||
UNIT_TEST(DiffSimpleReplace)
|
||||
{
|
||||
char const src[] = "abcxxxdef";
|
||||
char const dst[] = "abcyydef";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::SimpleReplaceDiffer differ;
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "=3.-3.+yy.=3.", ());
|
||||
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "6.6.yy4.6.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DiffSimpleReplaceEmptyBegin)
|
||||
{
|
||||
char const src[] = "xxxdef";
|
||||
char const dst[] = "yydef";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::SimpleReplaceDiffer differ;
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "-3.+yy.=3.", ());
|
||||
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "6.yy4.6.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DiffSimpleReplaceEmptyEnd)
|
||||
{
|
||||
char const src[] = "abcxxx";
|
||||
char const dst[] = "abcyy";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::SimpleReplaceDiffer differ;
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "=3.-3.+yy.", ());
|
||||
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "6.6.yy4.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DiffSimpleReplaceAllEqual)
|
||||
{
|
||||
char const src[] = "abcdef";
|
||||
char const dst[] = "abcdef";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::SimpleReplaceDiffer differ;
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "=6.", ());
|
||||
|
||||
TestPatchWriter patchWriter;
|
||||
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
|
||||
patchCoder.Finalize();
|
||||
TEST_EQUAL(patchWriter.Str(), "12.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DiffWithRollingHashEqualStrings)
|
||||
{
|
||||
char const src[] = "abcdefklmno";
|
||||
char const dst[] = "abcdefklmno";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "=3.=3.=3.=2.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DiffWithRollingHashCompletelyDifferentStrings)
|
||||
{
|
||||
char const src[] = "pqrstuvw";
|
||||
char const dst[] = "abcdefgh";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "-8.+abcdefgh.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DiffWithRollingHash1)
|
||||
{
|
||||
char const src[] = "abcdefghijklmnop";
|
||||
char const dst[] = "abcdfeghikkklmnop";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "=3.=1.-2.+fe.=3.-1.+kk.=2.=3.=1.", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DiffWithRollingHash2)
|
||||
{
|
||||
char const src[] = "abcdefghijklmnop";
|
||||
char const dst[] = "abxdeflmnop";
|
||||
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
|
||||
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
|
||||
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
|
||||
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
|
||||
|
||||
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
|
||||
|
||||
TestPatchCoder testPatchCoder;
|
||||
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
|
||||
TEST_EQUAL(testPatchCoder.Str(), "=2.-1.+x.=3.-5.=1.=3.=1.", ());
|
||||
}
|
||||
62
libs/coding/coding_tests/elias_coder_test.cpp
Normal file
62
libs/coding/coding_tests/elias_coder_test.cpp
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/bit_streams.hpp"
|
||||
#include "coding/elias_coder.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/bits.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename TCoder>
|
||||
void TestCoder(std::string const & name)
|
||||
{
|
||||
using TBuffer = std::vector<uint8_t>;
|
||||
using TWriter = MemWriter<TBuffer>;
|
||||
|
||||
uint64_t const kMask = 0xfedcba9876543210;
|
||||
|
||||
TBuffer buf;
|
||||
{
|
||||
TWriter w(buf);
|
||||
BitWriter<TWriter> bits(w);
|
||||
for (int i = 0; i <= 64; ++i)
|
||||
{
|
||||
uint64_t const mask = bits::GetFullMask(i);
|
||||
uint64_t const value = kMask & mask;
|
||||
if (value == 0)
|
||||
TEST(!TCoder::Encode(bits, value), (name, i));
|
||||
else
|
||||
TEST(TCoder::Encode(bits, value), (name, i));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
MemReader r(buf.data(), buf.size());
|
||||
ReaderSource<MemReader> src(r);
|
||||
BitReader<ReaderSource<MemReader>> bits(src);
|
||||
for (int i = 0; i <= 64; ++i)
|
||||
{
|
||||
uint64_t const mask = bits::GetFullMask(i);
|
||||
uint64_t const expected = kMask & mask;
|
||||
if (expected == 0)
|
||||
continue;
|
||||
TEST_EQUAL(expected, TCoder::Decode(bits), (name, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(EliasCoder_Gamma)
|
||||
{
|
||||
TestCoder<coding::GammaCoder>("Gamma");
|
||||
}
|
||||
UNIT_TEST(EliasCoder_Delta)
|
||||
{
|
||||
TestCoder<coding::DeltaCoder>("Delta");
|
||||
}
|
||||
} // namespace
|
||||
40
libs/coding/coding_tests/endianness_test.cpp
Normal file
40
libs/coding/coding_tests/endianness_test.cpp
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/endianness.hpp"
|
||||
|
||||
UNIT_TEST(Endianness1Byte)
|
||||
{
|
||||
TEST_EQUAL(uint8_t(0), ReverseByteOrder<uint8_t>(0), ());
|
||||
TEST_EQUAL(uint8_t(17), ReverseByteOrder<uint8_t>(17), ());
|
||||
TEST_EQUAL(uint8_t(255), ReverseByteOrder<uint8_t>(255), ());
|
||||
|
||||
TEST_EQUAL(uint8_t(0), ReverseByteOrder<uint8_t>(0), ());
|
||||
TEST_EQUAL(uint8_t(17), ReverseByteOrder<uint8_t>(17), ());
|
||||
TEST_EQUAL(uint8_t(255), ReverseByteOrder<uint8_t>(255), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Endianness12Bytes)
|
||||
{
|
||||
TEST_EQUAL(uint16_t(0), ReverseByteOrder<uint16_t>(0), ());
|
||||
TEST_EQUAL(uint16_t(256), ReverseByteOrder<uint16_t>(1), ());
|
||||
TEST_EQUAL(uint16_t(0xE8FD), ReverseByteOrder<uint16_t>(0xFDE8), ());
|
||||
TEST_EQUAL(uint16_t(0xFFFF), ReverseByteOrder<uint16_t>(0xFFFF), ());
|
||||
|
||||
TEST_EQUAL(uint16_t(0), ReverseByteOrder<uint16_t>(0), ());
|
||||
TEST_EQUAL(uint16_t(256), ReverseByteOrder<uint16_t>(1), ());
|
||||
TEST_EQUAL(uint16_t(0xE8FD), ReverseByteOrder<uint16_t>(0xFDE8), ());
|
||||
TEST_EQUAL(uint16_t(0xFFFF), ReverseByteOrder<uint16_t>(0xFFFF), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Endianness18Bytes)
|
||||
{
|
||||
TEST_EQUAL(0ULL, ReverseByteOrder(0ULL), ());
|
||||
TEST_EQUAL(1ULL, ReverseByteOrder(1ULL << 56), ());
|
||||
TEST_EQUAL(0xE2E4D7D5B1C3B8C6ULL, ReverseByteOrder(0xC6B8C3B1D5D7E4E2ULL), ());
|
||||
TEST_EQUAL(0xFFFFFFFFFFFFFFFFULL, ReverseByteOrder(0xFFFFFFFFFFFFFFFFULL), ());
|
||||
|
||||
TEST_EQUAL(0ULL, ReverseByteOrder(0ULL), ());
|
||||
TEST_EQUAL(1ULL, ReverseByteOrder(1ULL << 56), ());
|
||||
TEST_EQUAL(0xE2E4D7D5B1C3B8C6ULL, ReverseByteOrder(0xC6B8C3B1D5D7E4E2ULL), ());
|
||||
TEST_EQUAL(0xFFFFFFFFFFFFFFFFULL, ReverseByteOrder(0xFFFFFFFFFFFFFFFFULL), ());
|
||||
}
|
||||
278
libs/coding/coding_tests/file_data_test.cpp
Normal file
278
libs/coding/coding_tests/file_data_test.cpp
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/internal/file_data.hpp"
|
||||
|
||||
#include <cstring> // strlen
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
namespace file_data_test
|
||||
{
|
||||
std::string const name1 = "test1.file";
|
||||
std::string const name2 = "test2.file";
|
||||
|
||||
void MakeFile(std::string const & name)
|
||||
{
|
||||
base::FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
|
||||
f.Write(name.c_str(), name.size());
|
||||
}
|
||||
|
||||
void MakeFile(std::string const & name, size_t const size, char const c)
|
||||
{
|
||||
base::FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
|
||||
f.Write(std::string(size, c).c_str(), size);
|
||||
}
|
||||
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
void CheckFileOK(std::string const & name)
|
||||
{
|
||||
base::FileData f(name, base::FileData::Op::READ);
|
||||
|
||||
uint64_t const size = f.Size();
|
||||
TEST_EQUAL(size, name.size(), ());
|
||||
|
||||
std::vector<char> buffer(size);
|
||||
f.Read(0, &buffer[0], size);
|
||||
TEST(equal(name.begin(), name.end(), buffer.begin()), ());
|
||||
}
|
||||
#endif
|
||||
|
||||
UNIT_TEST(FileData_ApiSmoke)
|
||||
{
|
||||
MakeFile(name1);
|
||||
uint64_t const size = name1.size();
|
||||
|
||||
uint64_t sz;
|
||||
TEST(base::GetFileSize(name1, sz), ());
|
||||
TEST_EQUAL(sz, size, ());
|
||||
|
||||
TEST(base::RenameFileX(name1, name2), ());
|
||||
|
||||
TEST(!base::GetFileSize(name1, sz), ());
|
||||
TEST(base::GetFileSize(name2, sz), ());
|
||||
TEST_EQUAL(sz, size, ());
|
||||
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
|
||||
TEST(!base::GetFileSize(name2, sz), ());
|
||||
}
|
||||
|
||||
/*
|
||||
UNIT_TEST(FileData_NoDiskSpace)
|
||||
{
|
||||
char const * name = "/Volumes/KINDLE/file.bin";
|
||||
vector<uint8_t> bytes(100000000);
|
||||
|
||||
try
|
||||
{
|
||||
base::FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
|
||||
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
f.Write(&bytes[0], bytes.size());
|
||||
}
|
||||
catch (Writer::Exception const & ex)
|
||||
{
|
||||
LOG(LINFO, ("Writer exception catched"));
|
||||
}
|
||||
|
||||
(void)base::DeleteFileX(name);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
UNIT_TEST(FileData_SharingAV_Windows)
|
||||
{
|
||||
{
|
||||
MakeFile(name1);
|
||||
|
||||
// lock file, will check sharing access
|
||||
base::FileData f1(name1, base::FileData::Op::READ);
|
||||
|
||||
// try rename or delete locked file
|
||||
TEST(!base::RenameFileX(name1, name2), ());
|
||||
TEST(!base::DeleteFileX(name1), ());
|
||||
|
||||
MakeFile(name2);
|
||||
|
||||
// try rename or copy to locked file
|
||||
TEST(!base::RenameFileX(name2, name1), ());
|
||||
TEST(!base::CopyFileX(name2, name1), ());
|
||||
|
||||
// files should be unchanged
|
||||
CheckFileOK(name1);
|
||||
CheckFileOK(name2);
|
||||
|
||||
//TEST(base::CopyFile(name1, name2), ());
|
||||
}
|
||||
|
||||
// renaming to existing file is not allowed
|
||||
TEST(!base::RenameFileX(name1, name2), ());
|
||||
TEST(!base::RenameFileX(name2, name1), ());
|
||||
|
||||
TEST(base::DeleteFileX(name1), ());
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
}
|
||||
#endif
|
||||
*/
|
||||
|
||||
UNIT_TEST(Equal_Function_Test)
|
||||
{
|
||||
MakeFile(name1);
|
||||
MakeFile(name2);
|
||||
TEST(base::IsEqualFiles(name1, name1), ());
|
||||
TEST(base::IsEqualFiles(name2, name2), ());
|
||||
TEST(!base::IsEqualFiles(name1, name2), ());
|
||||
|
||||
TEST(base::DeleteFileX(name1), ());
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Equal_Function_Test_For_Big_Files)
|
||||
{
|
||||
{
|
||||
MakeFile(name1, 1024 * 1024, 'a');
|
||||
MakeFile(name2, 1024 * 1024, 'a');
|
||||
TEST(base::IsEqualFiles(name1, name2), ());
|
||||
TEST(base::DeleteFileX(name1), ());
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
}
|
||||
{
|
||||
MakeFile(name1, 1024 * 1024 + 512, 'a');
|
||||
MakeFile(name2, 1024 * 1024 + 512, 'a');
|
||||
TEST(base::IsEqualFiles(name1, name2), ());
|
||||
TEST(base::DeleteFileX(name1), ());
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
}
|
||||
{
|
||||
MakeFile(name1, 1024 * 1024 + 1, 'a');
|
||||
MakeFile(name2, 1024 * 1024 + 1, 'b');
|
||||
TEST(base::IsEqualFiles(name1, name1), ());
|
||||
TEST(base::IsEqualFiles(name2, name2), ());
|
||||
TEST(!base::IsEqualFiles(name1, name2), ());
|
||||
TEST(base::DeleteFileX(name1), ());
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
}
|
||||
{
|
||||
MakeFile(name1, 1024 * 1024, 'a');
|
||||
MakeFile(name2, 1024 * 1024, 'b');
|
||||
TEST(base::IsEqualFiles(name1, name1), ());
|
||||
TEST(base::IsEqualFiles(name2, name2), ());
|
||||
TEST(!base::IsEqualFiles(name1, name2), ());
|
||||
TEST(base::DeleteFileX(name1), ());
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
}
|
||||
{
|
||||
MakeFile(name1, 1024 * 1024, 'a');
|
||||
MakeFile(name2, 1024 * 1024 + 1, 'b');
|
||||
TEST(!base::IsEqualFiles(name1, name2), ());
|
||||
TEST(base::DeleteFileX(name1), ());
|
||||
TEST(base::DeleteFileX(name2), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(EmptyFile)
|
||||
{
|
||||
using namespace base;
|
||||
|
||||
std::string const name = "test.empty";
|
||||
std::string const copy = "test.empty.copy";
|
||||
|
||||
// Check that both files are not exist.
|
||||
uint64_t sz;
|
||||
TEST(!GetFileSize(name, sz), ());
|
||||
TEST(!GetFileSize(copy, sz), ());
|
||||
|
||||
// Try to copy non existing file - failed.
|
||||
TEST(!CopyFileX(name, copy), ());
|
||||
|
||||
// Again, both files are not exist.
|
||||
TEST(!GetFileSize(name, sz), ());
|
||||
TEST(!GetFileSize(copy, sz), ());
|
||||
|
||||
{
|
||||
// Create empty file with zero size.
|
||||
FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
|
||||
}
|
||||
|
||||
// Check that empty file is on disk.
|
||||
TEST(GetFileSize(name, sz), ());
|
||||
TEST_EQUAL(sz, 0, ());
|
||||
|
||||
// Do copy.
|
||||
TEST(CopyFileX(name, copy), ());
|
||||
// TEST(!RenameFileX(name, copy), ());
|
||||
|
||||
// Delete copy file and rename name -> copy.
|
||||
TEST(DeleteFileX(copy), ());
|
||||
TEST(RenameFileX(name, copy), ());
|
||||
|
||||
// Now we don't have an initial file but have a copy.
|
||||
TEST(!GetFileSize(name, sz), ());
|
||||
TEST(GetFileSize(copy, sz), ());
|
||||
TEST_EQUAL(sz, 0, ());
|
||||
|
||||
// Delete copy file.
|
||||
TEST(DeleteFileX(copy), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(RenameOnExistingFile)
|
||||
{
|
||||
using namespace base;
|
||||
|
||||
std::string const name = "test.empty";
|
||||
std::string const copy = "test.empty.copy";
|
||||
{
|
||||
FileData f(name, FileData::Op::WRITE_TRUNCATE);
|
||||
uint8_t const x = 1;
|
||||
f.Write(&x, 1);
|
||||
}
|
||||
{
|
||||
FileData f(copy, FileData::Op::WRITE_TRUNCATE);
|
||||
uint8_t const x = 2;
|
||||
f.Write(&x, 1);
|
||||
}
|
||||
|
||||
TEST(RenameFileX(name, copy), ());
|
||||
|
||||
{
|
||||
FileData f(copy, FileData::Op::READ);
|
||||
uint8_t x;
|
||||
f.Read(0, &x, 1);
|
||||
TEST_EQUAL(x, 1, ());
|
||||
}
|
||||
|
||||
TEST(DeleteFileX(copy), ());
|
||||
}
|
||||
|
||||
// Made this 'obvious' test for getline. I had (or not?) behaviour when 'while (getline)' loop
|
||||
// didn't get last string in file without trailing '\n'.
|
||||
UNIT_TEST(File_StdGetLine)
|
||||
{
|
||||
std::string const fName = "test.txt";
|
||||
|
||||
for (char const * buffer : {"x\nxy\nxyz\nxyzk", "x\nxy\nxyz\nxyzk\n"})
|
||||
{
|
||||
{
|
||||
base::FileData f(fName, base::FileData::Op::WRITE_TRUNCATE);
|
||||
f.Write(buffer, std::strlen(buffer));
|
||||
}
|
||||
|
||||
{
|
||||
std::ifstream ifs(fName);
|
||||
std::string line;
|
||||
size_t count = 0;
|
||||
while (std::getline(ifs, line))
|
||||
{
|
||||
++count;
|
||||
TEST_EQUAL(line.size(), count, ());
|
||||
}
|
||||
|
||||
TEST_EQUAL(count, 4, ());
|
||||
}
|
||||
|
||||
TEST(base::DeleteFileX(fName), ());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace file_data_test
|
||||
57
libs/coding/coding_tests/file_sort_test.cpp
Normal file
57
libs/coding/coding_tests/file_sort_test.cpp
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/file_sort.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
void TestFileSorter(vector<uint32_t> & data, char const * tmpFileName, size_t bufferSize)
|
||||
{
|
||||
vector<char> serial;
|
||||
typedef MemWriter<vector<char>> MemWriterType;
|
||||
MemWriterType writer(serial);
|
||||
typedef WriterFunctor<MemWriterType> OutT;
|
||||
OutT out(writer);
|
||||
FileSorter<uint32_t, OutT> sorter(bufferSize, tmpFileName, out);
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
sorter.Add(data[i]);
|
||||
sorter.SortAndFinish();
|
||||
|
||||
TEST_EQUAL(serial.size(), data.size() * sizeof(data[0]), ());
|
||||
sort(data.begin(), data.end());
|
||||
MemReader reader(&serial[0], serial.size());
|
||||
TEST_EQUAL(reader.Size(), data.size() * sizeof(data[0]), ());
|
||||
vector<uint32_t> result(data.size());
|
||||
reader.Read(0, &result[0], reader.Size());
|
||||
TEST_EQUAL(result, data, ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(FileSorter_Smoke)
|
||||
{
|
||||
vector<uint32_t> data;
|
||||
data.push_back(2);
|
||||
data.push_back(3);
|
||||
data.push_back(1);
|
||||
|
||||
TestFileSorter(data, "file_sorter_test_smoke.tmp", 10);
|
||||
}
|
||||
|
||||
UNIT_TEST(FileSorter_Random)
|
||||
{
|
||||
mt19937 rng(0);
|
||||
vector<uint32_t> data(1000);
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
data[i] = ((i + 1 % 100) ? rng() : data[i - 20]);
|
||||
|
||||
TestFileSorter(data, "file_sorter_test_random.tmp", data.size() / 10);
|
||||
}
|
||||
421
libs/coding/coding_tests/files_container_tests.cpp
Normal file
421
libs/coding/coding_tests/files_container_tests.cpp
Normal file
|
|
@ -0,0 +1,421 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/files_container.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/scope_guard.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/target_os.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#ifndef OMIM_OS_WINDOWS
|
||||
#include <unistd.h> // _SC_PAGESIZE
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
UNIT_TEST(FilesContainer_Smoke)
|
||||
{
|
||||
string const fName = "files_container.tmp";
|
||||
FileWriter::DeleteFileX(fName);
|
||||
size_t const count = 10;
|
||||
|
||||
// fill container one by one
|
||||
{
|
||||
FilesContainerW writer(fName);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
auto w = writer.GetWriter(strings::to_string(i));
|
||||
|
||||
for (uint32_t j = 0; j < i; ++j)
|
||||
WriteVarUint(w, j);
|
||||
}
|
||||
}
|
||||
|
||||
// read container one by one
|
||||
{
|
||||
FilesContainerR reader(fName);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
FilesContainerR::TReader r = reader.GetReader(strings::to_string(i));
|
||||
ReaderSource<FilesContainerR::TReader> src(r);
|
||||
|
||||
for (uint32_t j = 0; j < i; ++j)
|
||||
{
|
||||
uint32_t const test = ReadVarUint<uint32_t>(src);
|
||||
TEST_EQUAL(j, test, ());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// append to container
|
||||
uint32_t const arrAppend[] = {888, 777, 666};
|
||||
for (size_t i = 0; i < ARRAY_SIZE(arrAppend); ++i)
|
||||
{
|
||||
{
|
||||
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
|
||||
|
||||
auto w = writer.GetWriter(strings::to_string(arrAppend[i]));
|
||||
WriteVarUint(w, arrAppend[i]);
|
||||
}
|
||||
|
||||
// read appended
|
||||
{
|
||||
FilesContainerR reader(fName);
|
||||
|
||||
FilesContainerR::TReader r = reader.GetReader(strings::to_string(arrAppend[i]));
|
||||
ReaderSource<FilesContainerR::TReader> src(r);
|
||||
|
||||
uint32_t const test = ReadVarUint<uint32_t>(src);
|
||||
TEST_EQUAL(arrAppend[i], test, ());
|
||||
}
|
||||
}
|
||||
FileWriter::DeleteFileX(fName);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void CheckInvariant(FilesContainerR & reader, string const & tag, int64_t test)
|
||||
{
|
||||
FilesContainerR::TReader r = reader.GetReader(tag);
|
||||
TEST_EQUAL(test, ReadPrimitiveFromPos<int64_t>(r, 0), ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(FilesContainer_Shared)
|
||||
{
|
||||
string const fName = "files_container.tmp";
|
||||
FileWriter::DeleteFileX(fName);
|
||||
|
||||
uint32_t const count = 10;
|
||||
int64_t const test64 = 908175281437210836LL;
|
||||
|
||||
{
|
||||
// shared container fill
|
||||
|
||||
FilesContainerW writer(fName);
|
||||
|
||||
auto w1 = writer.GetWriter("5");
|
||||
WriteToSink(w1, uint32_t(0));
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i)
|
||||
WriteVarUint(w1, i);
|
||||
w1->Flush();
|
||||
|
||||
auto w2 = writer.GetWriter("2");
|
||||
WriteToSink(w2, test64);
|
||||
w2->Flush();
|
||||
}
|
||||
|
||||
{
|
||||
// shared container read and fill
|
||||
|
||||
FilesContainerR reader(fName);
|
||||
FilesContainerR::TReader r1 = reader.GetReader("5");
|
||||
uint64_t const offset = sizeof(uint32_t);
|
||||
r1 = r1.SubReader(offset, r1.Size() - offset);
|
||||
|
||||
CheckInvariant(reader, "2", test64);
|
||||
|
||||
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
|
||||
auto w = writer.GetWriter("3");
|
||||
|
||||
ReaderSource<FilesContainerR::TReader> src(r1);
|
||||
for (uint32_t i = 0; i < count; ++i)
|
||||
{
|
||||
uint32_t test = ReadVarUint<uint32_t>(src);
|
||||
TEST_EQUAL(test, i, ());
|
||||
WriteVarUint(w, i);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// check invariant
|
||||
FilesContainerR reader(fName);
|
||||
CheckInvariant(reader, "2", test64);
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(fName);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void ReplaceInContainer(string const & fName, char const * key, char const * value)
|
||||
{
|
||||
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
|
||||
auto w = writer.GetWriter(key);
|
||||
w->Write(value, strlen(value));
|
||||
}
|
||||
|
||||
void CheckContainer(string const & fName, char const * key[], char const * value[], size_t count)
|
||||
{
|
||||
FilesContainerR reader(fName);
|
||||
LOG(LINFO, ("Size=", reader.GetFileSize()));
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
FilesContainerR::TReader r = reader.GetReader(key[i]);
|
||||
|
||||
size_t const szBuffer = 100;
|
||||
size_t const szS = strlen(value[i]);
|
||||
|
||||
char s[szBuffer] = {0};
|
||||
ASSERT_LESS(szS, szBuffer, ());
|
||||
r.Read(0, s, szS);
|
||||
|
||||
TEST(strcmp(value[i], s) == 0, (s));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(FilesContainer_RewriteExisting)
|
||||
{
|
||||
string const fName = "files_container.tmp";
|
||||
FileWriter::DeleteFileX(fName);
|
||||
|
||||
char const * key[] = {"3", "2", "1"};
|
||||
char const * value[] = {"prolog", "data", "epilog"};
|
||||
|
||||
// fill container
|
||||
{
|
||||
FilesContainerW writer(fName);
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
|
||||
{
|
||||
auto w = writer.GetWriter(key[i]);
|
||||
w->Write(value[i], strlen(value[i]));
|
||||
}
|
||||
}
|
||||
|
||||
// re-write middle file in container
|
||||
char const * buffer1 = "xxxxxxx";
|
||||
ReplaceInContainer(fName, key[1], buffer1);
|
||||
char const * value1[] = {value[0], buffer1, value[2]};
|
||||
CheckContainer(fName, key, value1, 3);
|
||||
|
||||
// re-write end file in container
|
||||
char const * buffer2 = "yyyyyyyyyyyyyy";
|
||||
ReplaceInContainer(fName, key[2], buffer2);
|
||||
char const * value2[] = {value[0], buffer1, buffer2};
|
||||
CheckContainer(fName, key, value2, 3);
|
||||
|
||||
// re-write end file in container once again
|
||||
char const * buffer3 = "zzz";
|
||||
ReplaceInContainer(fName, key[2], buffer3);
|
||||
char const * value3[] = {value[0], buffer1, buffer3};
|
||||
CheckContainer(fName, key, value3, 3);
|
||||
|
||||
FileWriter::DeleteFileX(fName);
|
||||
}
|
||||
|
||||
/// @todo To make this test work, need to review FilesContainerW::GetWriter logic.
|
||||
/*
|
||||
UNIT_TEST(FilesContainer_ConsecutiveRewriteExisting)
|
||||
{
|
||||
string const fName = "files_container.tmp";
|
||||
FileWriter::DeleteFileX(fName);
|
||||
|
||||
char const * key[] = { "3", "2", "1" };
|
||||
char const * value[] = { "prolog", "data", "epilog" };
|
||||
|
||||
// fill container
|
||||
{
|
||||
FilesContainerW writer(fName);
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
|
||||
{
|
||||
auto w = writer.GetWriter(key[i]);
|
||||
w->Write(value[i], strlen(value[i]));
|
||||
}
|
||||
}
|
||||
|
||||
char const * buf0 = "xxx";
|
||||
char const * buf1 = "yyy";
|
||||
{
|
||||
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
|
||||
|
||||
{
|
||||
auto w = writer.GetWriter(key[0]);
|
||||
w->Write(buf0, strlen(buf0));
|
||||
}
|
||||
|
||||
{
|
||||
auto w = writer.GetWriter(key[1]);
|
||||
w->Write(buf1, strlen(buf1));
|
||||
}
|
||||
}
|
||||
|
||||
char const * values[] = { buf0, buf1, value[2] };
|
||||
CheckContainer(fName, key, values, 3);
|
||||
}
|
||||
*/
|
||||
|
||||
UNIT_TEST(FilesMappingContainer_Handle)
|
||||
{
|
||||
string const fName = "files_container.tmp";
|
||||
string const tag = "dummy";
|
||||
|
||||
{
|
||||
FilesContainerW writer(fName);
|
||||
auto w = writer.GetWriter(tag);
|
||||
w->Write(tag.c_str(), tag.size());
|
||||
}
|
||||
|
||||
{
|
||||
FilesMappingContainer cont(fName);
|
||||
|
||||
FilesMappingContainer::Handle h1 = cont.Map(tag);
|
||||
TEST(h1.IsValid(), ());
|
||||
|
||||
FilesMappingContainer::Handle h2;
|
||||
TEST(!h2.IsValid(), ());
|
||||
|
||||
h2.Assign(std::move(h1));
|
||||
TEST(!h1.IsValid(), ());
|
||||
TEST(h2.IsValid(), ());
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(fName);
|
||||
}
|
||||
|
||||
UNIT_TEST(FilesMappingContainer_MoveHandle)
|
||||
{
|
||||
static uint8_t const kNumMapTests = 200;
|
||||
class HandleWrapper
|
||||
{
|
||||
public:
|
||||
explicit HandleWrapper(FilesMappingContainer::Handle && handle) : m_handle(std::move(handle))
|
||||
{
|
||||
TEST(m_handle.IsValid(), ());
|
||||
}
|
||||
|
||||
private:
|
||||
FilesMappingContainer::Handle m_handle;
|
||||
};
|
||||
|
||||
string const containerPath = "files_container.tmp";
|
||||
string const tagName = "dummy";
|
||||
|
||||
SCOPE_GUARD(deleteContainerFileGuard, bind(&FileWriter::DeleteFileX, cref(containerPath)));
|
||||
|
||||
{
|
||||
FilesContainerW writer(containerPath);
|
||||
auto w = writer.GetWriter(tagName);
|
||||
w->Write(tagName.c_str(), tagName.size());
|
||||
}
|
||||
|
||||
{
|
||||
FilesMappingContainer cont(containerPath);
|
||||
|
||||
FilesMappingContainer::Handle h1 = cont.Map(tagName);
|
||||
TEST(h1.IsValid(), ());
|
||||
|
||||
FilesMappingContainer::Handle h2(std::move(h1));
|
||||
TEST(h2.IsValid(), ());
|
||||
TEST(!h1.IsValid(), ());
|
||||
|
||||
for (int i = 0; i < kNumMapTests; ++i)
|
||||
{
|
||||
FilesMappingContainer::Handle parent_handle = cont.Map(tagName);
|
||||
HandleWrapper tmp(std::move(parent_handle));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(FilesMappingContainer_Smoke)
|
||||
{
|
||||
string const fName = "files_container.tmp";
|
||||
char const * key[] = {"3", "2", "1"};
|
||||
uint32_t const count = 1000000;
|
||||
|
||||
// fill container
|
||||
{
|
||||
FilesContainerW writer(fName);
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
|
||||
{
|
||||
auto w = writer.GetWriter(key[i]);
|
||||
for (uint32_t j = 0; j < count; ++j)
|
||||
{
|
||||
uint32_t v = j + static_cast<uint32_t>(i);
|
||||
w->Write(&v, sizeof(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
FilesMappingContainer reader(fName);
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
|
||||
{
|
||||
FilesMappingContainer::Handle h = reader.Map(key[i]);
|
||||
uint32_t const * data = h.GetData<uint32_t>();
|
||||
|
||||
for (uint32_t j = 0; j < count; ++j)
|
||||
{
|
||||
TEST_EQUAL(j + i, *data, ());
|
||||
++data;
|
||||
}
|
||||
|
||||
h.Unmap();
|
||||
}
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(fName);
|
||||
}
|
||||
|
||||
UNIT_TEST(FilesMappingContainer_PageSize)
|
||||
{
|
||||
string const fName = "files_container.tmp";
|
||||
|
||||
size_t const pageSize =
|
||||
#ifndef OMIM_OS_WINDOWS
|
||||
sysconf(_SC_PAGESIZE);
|
||||
#else
|
||||
4096;
|
||||
#endif
|
||||
LOG(LINFO, ("Page size:", pageSize));
|
||||
|
||||
char const * key[] = {"3", "2", "1"};
|
||||
char const byte[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
|
||||
size_t count[] = {pageSize - 1, pageSize, pageSize + 1};
|
||||
size_t const sz = ARRAY_SIZE(key);
|
||||
|
||||
{
|
||||
FilesContainerW writer(fName);
|
||||
|
||||
for (size_t i = 0; i < sz; ++i)
|
||||
{
|
||||
auto w = writer.GetWriter(key[i]);
|
||||
for (size_t j = 0; j < count[i]; ++j)
|
||||
w->Write(&byte[j % ARRAY_SIZE(byte)], 1);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
FilesMappingContainer reader(fName);
|
||||
FilesMappingContainer::Handle handle[sz];
|
||||
|
||||
for (size_t i = 0; i < sz; ++i)
|
||||
{
|
||||
handle[i].Assign(reader.Map(key[i]));
|
||||
TEST_EQUAL(handle[i].GetSize(), count[i], ());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < sz; ++i)
|
||||
{
|
||||
char const * data = handle[i].GetData<char>();
|
||||
for (size_t j = 0; j < count[i]; ++j)
|
||||
TEST_EQUAL(*data++, byte[j % ARRAY_SIZE(byte)], ());
|
||||
}
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(fName);
|
||||
}
|
||||
81
libs/coding/coding_tests/fixed_bits_ddvector_test.cpp
Normal file
81
libs/coding/coding_tests/fixed_bits_ddvector_test.cpp
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/fixed_bits_ddvector.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <initializer_list>
|
||||
#include <random>
|
||||
#include <utility>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <size_t Bits>
|
||||
void TestWithData(vector<uint32_t> const & lst)
|
||||
{
|
||||
using TVector = FixedBitsDDVector<Bits, MemReader>;
|
||||
using TBuffer = vector<uint8_t>;
|
||||
using TWriter = MemWriter<TBuffer>;
|
||||
|
||||
TBuffer buf;
|
||||
{
|
||||
TWriter writer(buf);
|
||||
typename TVector::template Builder<TWriter> builder(writer);
|
||||
|
||||
uint32_t optCount = 0;
|
||||
uint32_t const optBound = (1 << Bits) - 2;
|
||||
|
||||
for (uint32_t v : lst)
|
||||
{
|
||||
if (v < optBound)
|
||||
++optCount;
|
||||
|
||||
builder.PushBack(v);
|
||||
}
|
||||
|
||||
pair<uint32_t, uint32_t> expected(optCount, lst.size());
|
||||
TEST_EQUAL(builder.GetCount(), expected, ());
|
||||
}
|
||||
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
auto const vec = TVector::Create(reader);
|
||||
|
||||
uint32_t i = 0;
|
||||
for (uint32_t actual : lst)
|
||||
{
|
||||
uint32_t expected;
|
||||
TEST(vec->Get(i, expected), ());
|
||||
TEST_EQUAL(expected, actual, ());
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(FixedBitsDDVector_Smoke)
|
||||
{
|
||||
TestWithData<3>({0, 3, 6});
|
||||
TestWithData<3>({7, 20, 50});
|
||||
TestWithData<3>({1, 0, 4, 30, 5, 3, 6, 7, 2, 8, 0});
|
||||
}
|
||||
|
||||
UNIT_TEST(FixedBitsDDVector_Rand)
|
||||
{
|
||||
vector<uint32_t> v;
|
||||
|
||||
default_random_engine gen;
|
||||
uniform_int_distribution<uint32_t> distribution(0, 1000);
|
||||
|
||||
size_t constexpr kMaxCount = 1000;
|
||||
for (size_t i = 0; i < kMaxCount; ++i)
|
||||
v.push_back(distribution(gen));
|
||||
|
||||
TestWithData<3>(v);
|
||||
TestWithData<4>(v);
|
||||
TestWithData<5>(v);
|
||||
TestWithData<6>(v);
|
||||
TestWithData<7>(v);
|
||||
TestWithData<8>(v);
|
||||
TestWithData<9>(v);
|
||||
}
|
||||
196
libs/coding/coding_tests/geometry_coding_test.cpp
Normal file
196
libs/coding/coding_tests/geometry_coding_test.cpp
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/byte_stream.hpp"
|
||||
#include "coding/coding_tests/test_polylines.hpp"
|
||||
#include "coding/geometry_coding.hpp"
|
||||
#include "coding/point_coding.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "geometry/geometry_tests/large_polygon.hpp"
|
||||
#include "geometry/mercator.hpp"
|
||||
#include "geometry/parametrized_segment.hpp"
|
||||
#include "geometry/simplification.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
using namespace coding;
|
||||
using namespace std;
|
||||
|
||||
using PU = m2::PointU;
|
||||
using PD = m2::PointD;
|
||||
|
||||
namespace
|
||||
{
|
||||
m2::PointU D2U(m2::PointD const & p)
|
||||
{
|
||||
return PointDToPointU(p, kPointCoordBits);
|
||||
}
|
||||
|
||||
m2::PointU GetMaxPoint()
|
||||
{
|
||||
return D2U(m2::PointD(mercator::Bounds::kMaxX, mercator::Bounds::kMaxY));
|
||||
}
|
||||
|
||||
void TestPolylineEncode(string testName, vector<m2::PointU> const & points, m2::PointU const & maxPoint,
|
||||
void (*fnEncode)(InPointsT const & points, m2::PointU const & basePoint,
|
||||
m2::PointU const & maxPoint, OutDeltasT & deltas),
|
||||
void (*fnDecode)(InDeltasT const & deltas, m2::PointU const & basePoint,
|
||||
m2::PointU const & maxPoint, OutPointsT & points))
|
||||
{
|
||||
size_t const count = points.size();
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
m2::PointU const basePoint = m2::PointU::Zero();
|
||||
|
||||
vector<uint64_t> deltas;
|
||||
deltas.resize(count);
|
||||
|
||||
OutDeltasT deltasA(deltas);
|
||||
fnEncode(make_read_adapter(points), basePoint, maxPoint, deltasA);
|
||||
|
||||
vector<m2::PointU> decodedPoints;
|
||||
decodedPoints.resize(count);
|
||||
|
||||
OutPointsT decodedPointsA(decodedPoints);
|
||||
fnDecode(make_read_adapter(deltas), basePoint, maxPoint, decodedPointsA);
|
||||
|
||||
TEST_EQUAL(points, decodedPoints, ());
|
||||
|
||||
if (points.size() > 10)
|
||||
{
|
||||
vector<char> data;
|
||||
MemWriter<vector<char>> writer(data);
|
||||
|
||||
for (size_t i = 0; i != deltas.size(); ++i)
|
||||
WriteVarUint(writer, deltas[i]);
|
||||
|
||||
LOG(LINFO, (testName, points.size(), data.size()));
|
||||
}
|
||||
}
|
||||
|
||||
vector<m2::PointU> SimplifyPoints(vector<m2::PointU> const & points, double eps)
|
||||
{
|
||||
vector<m2::PointU> simpPoints;
|
||||
SimplifyDefault(points.begin(), points.end(), eps, simpPoints);
|
||||
return simpPoints;
|
||||
}
|
||||
|
||||
void TestEncodePolyline(string name, m2::PointU maxPoint, vector<m2::PointU> const & points)
|
||||
{
|
||||
TestPolylineEncode(name + "1", points, maxPoint, &EncodePolylinePrev1, &DecodePolylinePrev1);
|
||||
TestPolylineEncode(name + "2", points, maxPoint, &EncodePolylinePrev2, &DecodePolylinePrev2);
|
||||
TestPolylineEncode(name + "3", points, maxPoint, &EncodePolylinePrev3, &DecodePolylinePrev3);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(EncodePointDeltaAsUint)
|
||||
{
|
||||
for (int x = -100; x <= 100; ++x)
|
||||
{
|
||||
for (int y = -100; y <= 100; ++y)
|
||||
{
|
||||
PU orig = PU(100 + x, 100 + y);
|
||||
PU pred = PU(100, 100);
|
||||
TEST_EQUAL(orig, DecodePointDeltaFromUint(EncodePointDeltaAsUint(orig, pred), pred), ());
|
||||
vector<char> data;
|
||||
PushBackByteSink<vector<char>> sink(data);
|
||||
WriteVarUint(sink, EncodePointDeltaAsUint(orig, pred));
|
||||
size_t expectedSize = 1;
|
||||
if (x >= 8 || x < -8 || y >= 4 || y < -4)
|
||||
expectedSize = 2;
|
||||
if (x >= 64 || x < -64 || y >= 64 || y < -64)
|
||||
expectedSize = 3;
|
||||
TEST_EQUAL(data.size(), expectedSize, (x, y));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(PredictPointsInPolyline2)
|
||||
{
|
||||
// Ci = Ci-1 + (Ci-1 + Ci-2) / 2
|
||||
TEST_EQUAL(PU(5, 5), PredictPointInPolyline(PD(8, 7), PU(4, 4), PU(1, 2)), ());
|
||||
|
||||
// Clamp max
|
||||
TEST_EQUAL(PU(4, 4), PredictPointInPolyline(PD(4, 4), PU(4, 4), PU(1, 2)), ());
|
||||
TEST_EQUAL(PU(5, 5), PredictPointInPolyline(PD(8, 7), PU(4, 4), PU(1, 2)), ());
|
||||
TEST_EQUAL(PU(5, 5), PredictPointInPolyline(PD(5, 5), PU(4, 4), PU(1, 2)), ());
|
||||
|
||||
// Clamp 0
|
||||
TEST_EQUAL(PU(4, 0), PredictPointInPolyline(PD(5, 5), PU(4, 1), PU(4, 4)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PredictPointsInTriangle)
|
||||
{
|
||||
// Ci = Ci-1 + Ci-2 - Ci-3
|
||||
TEST_EQUAL(PU(1, 1), PredictPointInTriangle(PD(100, 100), PU(1, 0), PU(0, 1), PU(0, 0)), ());
|
||||
|
||||
// Clamp 0
|
||||
TEST_EQUAL(PU(0, 0), PredictPointInTriangle(PD(100, 100), PU(1, 0), PU(0, 1), PU(5, 5)), ());
|
||||
|
||||
// Clamp max
|
||||
TEST_EQUAL(PU(10, 10), PredictPointInTriangle(PD(10, 10), PU(8, 7), PU(6, 5), PU(1, 1)), ());
|
||||
}
|
||||
|
||||
/*
|
||||
UNIT_TEST(PredictPointsInPolyline3_Square)
|
||||
{
|
||||
TEST_EQUAL(PU(5, 1), PredictPointInPolyline(PU(6, 6), PU(5, 4), PU(2, 4), PU(2, 1)), ());
|
||||
TEST_EQUAL(PU(5, 3), PredictPointInPolyline(PU(6, 6), PU(4, 1), PU(2, 2), PU(3, 4)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PredictPointsInPolyline3_SquareClamp0)
|
||||
{
|
||||
TEST_EQUAL(PU(5, 1), PredictPointInPolyline(PU(6, 6), PU(5, 4), PU(2, 4), PU(2, 1)), ());
|
||||
TEST_EQUAL(PU(4, 0), PredictPointInPolyline(PU(6, 6), PU(2, 0), PU(3, 2), PU(5, 1)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PredictPointsInPolyline3_90deg)
|
||||
{
|
||||
TEST_EQUAL(PU(3, 2), PredictPointInPolyline(PU(8, 8), PU(3, 6), PU(1, 6), PU(1, 5)), ());
|
||||
}
|
||||
*/
|
||||
|
||||
UNIT_TEST(EncodePolyline)
|
||||
{
|
||||
size_t const kSizes[] = {0, 1, 2, 3, 4, ARRAY_SIZE(LargePolygon::kLargePolygon)};
|
||||
m2::PointU const maxPoint(1000000000, 1000000000);
|
||||
for (size_t iSize = 0; iSize < ARRAY_SIZE(kSizes); ++iSize)
|
||||
{
|
||||
size_t const polygonSize = kSizes[iSize];
|
||||
vector<m2::PointU> points;
|
||||
points.reserve(polygonSize);
|
||||
for (size_t i = 0; i < polygonSize; ++i)
|
||||
points.push_back(m2::PointU(static_cast<uint32_t>(LargePolygon::kLargePolygon[i].x * 10000),
|
||||
static_cast<uint32_t>((LargePolygon::kLargePolygon[i].y + 200) * 10000)));
|
||||
|
||||
TestEncodePolyline("Unsimp", maxPoint, points);
|
||||
TestEncodePolyline("1simp", maxPoint, SimplifyPoints(points, 1));
|
||||
TestEncodePolyline("2simp", maxPoint, SimplifyPoints(points, 2));
|
||||
TestEncodePolyline("4simp", maxPoint, SimplifyPoints(points, 4));
|
||||
TestEncodePolyline("10simp", maxPoint, SimplifyPoints(points, 10));
|
||||
TestEncodePolyline("100simp", maxPoint, SimplifyPoints(points, 100));
|
||||
TestEncodePolyline("500simp", maxPoint, SimplifyPoints(points, 500));
|
||||
TestEncodePolyline("1000simp", maxPoint, SimplifyPoints(points, 1000));
|
||||
TestEncodePolyline("2000simp", maxPoint, SimplifyPoints(points, 2000));
|
||||
TestEncodePolyline("4000simp", maxPoint, SimplifyPoints(points, 4000));
|
||||
}
|
||||
}
|
||||
|
||||
// see 476c1d1d125f0c2deb8c commit for special decode test
|
||||
|
||||
UNIT_TEST(DecodeEncodePolyline_DataSet1)
|
||||
{
|
||||
size_t const count = ARRAY_SIZE(geometry_coding_tests::arr1);
|
||||
vector<m2::PointU> points;
|
||||
points.reserve(count);
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
points.push_back(D2U(geometry_coding_tests::arr1[i]));
|
||||
|
||||
TestPolylineEncode("DataSet1", points, GetMaxPoint(), &EncodePolyline, &DecodePolyline);
|
||||
}
|
||||
65
libs/coding/coding_tests/geometry_serialization_test.cpp
Normal file
65
libs/coding/coding_tests/geometry_serialization_test.cpp
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/byte_stream.hpp"
|
||||
#include "coding/coding_tests/test_polylines.hpp"
|
||||
#include "coding/geometry_coding.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/math.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Copy-Paste from generator/feature_builder.cpp
|
||||
namespace
|
||||
{
|
||||
bool IsEqual(double d1, double d2)
|
||||
{
|
||||
return AlmostEqualAbs(d1, d2, kMwmPointAccuracy);
|
||||
}
|
||||
|
||||
bool IsEqual(m2::PointD const & p1, m2::PointD const & p2)
|
||||
{
|
||||
return p1.EqualDxDy(p2, kMwmPointAccuracy);
|
||||
}
|
||||
|
||||
bool IsEqual(m2::RectD const & r1, m2::RectD const & r2)
|
||||
{
|
||||
return (IsEqual(r1.minX(), r2.minX()) && IsEqual(r1.minY(), r2.minY()) && IsEqual(r1.maxX(), r2.maxX()) &&
|
||||
IsEqual(r1.maxY(), r2.maxY()));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(SaveLoadPolyline_DataSet1)
|
||||
{
|
||||
using namespace geometry_coding_tests;
|
||||
|
||||
vector<m2::PointD> data1(arr1, arr1 + ARRAY_SIZE(arr1));
|
||||
|
||||
vector<char> buffer;
|
||||
PushBackByteSink<vector<char>> w(buffer);
|
||||
|
||||
serial::GeometryCodingParams cp;
|
||||
serial::SaveOuterPath(data1, cp, w);
|
||||
|
||||
vector<m2::PointD> data2;
|
||||
ArrayByteSource r(&buffer[0]);
|
||||
serial::LoadOuterPath(r, cp, data2);
|
||||
|
||||
TEST_EQUAL(data1.size(), data2.size(), ());
|
||||
|
||||
m2::RectD r1, r2;
|
||||
for (size_t i = 0; i < data1.size(); ++i)
|
||||
{
|
||||
r1.Add(data1[i]);
|
||||
r2.Add(data2[i]);
|
||||
|
||||
TEST(IsEqual(data1[i], data2[i]), (data1[i], data2[i]));
|
||||
}
|
||||
|
||||
TEST(IsEqual(r1, r2), (r1, r2));
|
||||
}
|
||||
51
libs/coding/coding_tests/hex_test.cpp
Normal file
51
libs/coding/coding_tests/hex_test.cpp
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/hex.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
UNIT_TEST(GoldenRecode)
|
||||
{
|
||||
string data("\x01\x23\x45\x67\x89\xAB\xCD\xEF");
|
||||
string hexData("0123456789ABCDEF");
|
||||
|
||||
TEST_EQUAL(ToHex(data), hexData, ());
|
||||
TEST_EQUAL(data, FromHex(hexData), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(RandomRecode)
|
||||
{
|
||||
mt19937 rng(0);
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
{
|
||||
string data(1 + (rng() % 20), 0);
|
||||
for (size_t j = 0; j < data.size(); ++j)
|
||||
data[j] = static_cast<char>(rng() % 26) + 'A';
|
||||
TEST_EQUAL(data, FromHex(ToHex(data)), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(EncodeNumber)
|
||||
{
|
||||
TEST_EQUAL(NumToHex(uint64_t(0x0123456789ABCDEFULL)), "0123456789ABCDEF", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DecodeLowerCaseHex)
|
||||
{
|
||||
TEST_EQUAL(FromHex("fe"), "\xfe", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(EncodeEmptyString)
|
||||
{
|
||||
TEST_EQUAL(ToHex(string()), "", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(DecodeEmptyString)
|
||||
{
|
||||
TEST_EQUAL(FromHex(""), "", ());
|
||||
}
|
||||
133
libs/coding/coding_tests/huffman_test.cpp
Normal file
133
libs/coding/coding_tests/huffman_test.cpp
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/huffman.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
vector<strings::UniString> MakeUniStringVector(vector<string> const & v)
|
||||
{
|
||||
vector<strings::UniString> result(v.size());
|
||||
for (size_t i = 0; i < v.size(); ++i)
|
||||
result[i] = strings::MakeUniString(v[i]);
|
||||
return result;
|
||||
}
|
||||
|
||||
void TestDecode(coding::HuffmanCoder const & h, uint32_t bits, uint32_t len, uint32_t expected)
|
||||
{
|
||||
coding::HuffmanCoder::Code code(bits, len);
|
||||
uint32_t received;
|
||||
TEST(h.Decode(code, received), ("Could not decode", code.bits, "( length", code.len, ")"));
|
||||
TEST_EQUAL(expected, received, ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace coding
|
||||
{
|
||||
UNIT_TEST(Huffman_Smoke)
|
||||
{
|
||||
HuffmanCoder h;
|
||||
h.Init(MakeUniStringVector(vector<string>{"ab", "ac"}));
|
||||
|
||||
TestDecode(h, 0, 1, static_cast<uint32_t>('a')); // 0
|
||||
TestDecode(h, 1, 2, static_cast<uint32_t>('b')); // 10
|
||||
TestDecode(h, 3, 2, static_cast<uint32_t>('c')); // 11
|
||||
}
|
||||
|
||||
UNIT_TEST(Huffman_OneSymbol)
|
||||
{
|
||||
HuffmanCoder h;
|
||||
h.Init(MakeUniStringVector(vector<string>{string(5, 0)}));
|
||||
|
||||
TestDecode(h, 0, 0, 0);
|
||||
}
|
||||
|
||||
UNIT_TEST(Huffman_NonAscii)
|
||||
{
|
||||
HuffmanCoder h;
|
||||
string const data = "2πΩ";
|
||||
strings::UniString const uniData = strings::MakeUniString(data);
|
||||
h.Init(vector<strings::UniString>{uniData});
|
||||
|
||||
TestDecode(h, 0, 2, static_cast<uint32_t>(uniData[0])); // 00
|
||||
TestDecode(h, 1, 1, static_cast<uint32_t>(uniData[1])); // 1
|
||||
TestDecode(h, 2, 2, static_cast<uint32_t>(uniData[2])); // 01
|
||||
}
|
||||
|
||||
UNIT_TEST(Huffman_Init)
|
||||
{
|
||||
HuffmanCoder h;
|
||||
h.Init(MakeUniStringVector(vector<string>{"ab"}));
|
||||
|
||||
vector<uint8_t> buf;
|
||||
buf.push_back(16); // size
|
||||
buf.push_back(105); // 01101001
|
||||
buf.push_back(150); // 10010110
|
||||
|
||||
MemReader memReader(&buf[0], buf.size());
|
||||
ReaderSource<MemReader> reader(memReader);
|
||||
strings::UniString received = h.ReadAndDecode(reader);
|
||||
strings::UniString expected = strings::MakeUniString("baababbaabbabaab");
|
||||
|
||||
TEST_EQUAL(expected, received, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Huffman_Serialization_Encoding)
|
||||
{
|
||||
HuffmanCoder hW;
|
||||
hW.Init(MakeUniStringVector(vector<string>{"aaaaaaaaaa", "bbbbbbbbbb", "ccccc", "ddddd"})); // 10, 10, 5, 5
|
||||
vector<uint8_t> buf;
|
||||
MemWriter<vector<uint8_t>> writer(buf);
|
||||
hW.WriteEncoding(writer);
|
||||
|
||||
HuffmanCoder hR;
|
||||
MemReader memReader(&buf[0], buf.size());
|
||||
ReaderSource<MemReader> reader(memReader);
|
||||
hR.ReadEncoding(reader);
|
||||
|
||||
TEST_EQUAL(reader.Pos(), writer.Pos(), ());
|
||||
|
||||
TestDecode(hW, 0, 2, static_cast<uint32_t>('a')); // 00
|
||||
TestDecode(hW, 2, 2, static_cast<uint32_t>('b')); // 01
|
||||
TestDecode(hW, 1, 2, static_cast<uint32_t>('c')); // 10
|
||||
TestDecode(hW, 3, 2, static_cast<uint32_t>('d')); // 11
|
||||
|
||||
TestDecode(hR, 0, 2, static_cast<uint32_t>('a'));
|
||||
TestDecode(hR, 2, 2, static_cast<uint32_t>('b'));
|
||||
TestDecode(hR, 1, 2, static_cast<uint32_t>('c'));
|
||||
TestDecode(hR, 3, 2, static_cast<uint32_t>('d'));
|
||||
}
|
||||
|
||||
UNIT_TEST(Huffman_Serialization_Data)
|
||||
{
|
||||
HuffmanCoder hW;
|
||||
hW.Init(MakeUniStringVector(vector<string>{"aaaaaaaaaa", "bbbbbbbbbb", "ccccc", "ddddd"})); // 10, 10, 5, 5
|
||||
vector<uint8_t> buf;
|
||||
|
||||
string const data = "abacabaddddaaabbcabacabadbabd";
|
||||
strings::UniString expected = strings::UniString(data.begin(), data.end());
|
||||
|
||||
MemWriter<vector<uint8_t>> writer(buf);
|
||||
hW.WriteEncoding(writer);
|
||||
hW.EncodeAndWrite(writer, expected);
|
||||
|
||||
HuffmanCoder hR;
|
||||
MemReader memReader(&buf[0], buf.size());
|
||||
ReaderSource<MemReader> reader(memReader);
|
||||
hR.ReadEncoding(reader);
|
||||
strings::UniString received = hR.ReadAndDecode(reader);
|
||||
|
||||
TEST_EQUAL(expected, received, ());
|
||||
}
|
||||
|
||||
} // namespace coding
|
||||
108
libs/coding/coding_tests/map_uint32_to_val_tests.cpp
Normal file
108
libs/coding/coding_tests/map_uint32_to_val_tests.cpp
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/map_uint32_to_val.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace map_uint32_tests
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
using BufferT = vector<uint8_t>;
|
||||
using ValuesT = vector<uint32_t>;
|
||||
using BuilderT = MapUint32ToValueBuilder<uint32_t>;
|
||||
using MapT = MapUint32ToValue<uint32_t>;
|
||||
|
||||
UNIT_TEST(MapUint32Val_Small)
|
||||
{
|
||||
{
|
||||
BuilderT builder;
|
||||
BufferT buffer;
|
||||
MemWriter writer(buffer);
|
||||
builder.Freeze(writer, [](Writer &, auto, auto) {});
|
||||
|
||||
LOG(LINFO, ("Empty map size =", buffer.size()));
|
||||
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
auto map = MapT::Load(reader, [](NonOwningReaderSource &, uint32_t, ValuesT &) {});
|
||||
|
||||
TEST_EQUAL(map->Count(), 0, ());
|
||||
uint32_t dummy;
|
||||
TEST(!map->Get(1, dummy), ());
|
||||
}
|
||||
|
||||
{
|
||||
BuilderT builder;
|
||||
builder.Put(1, 777);
|
||||
BufferT buffer;
|
||||
MemWriter writer(buffer);
|
||||
builder.Freeze(writer, [](Writer & writer, auto b, auto e)
|
||||
{
|
||||
WriteVarUint(writer, *b++);
|
||||
TEST(b == e, ());
|
||||
});
|
||||
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
auto map = MapT::Load(reader, [](NonOwningReaderSource & source, uint32_t blockSize, ValuesT & values)
|
||||
{
|
||||
TEST_EQUAL(blockSize, 1, ("GetThreadsafe should pass optimal blockSize"));
|
||||
while (source.Size() > 0)
|
||||
values.push_back(ReadVarUint<uint32_t>(source));
|
||||
TEST_EQUAL(values.size(), 1, ());
|
||||
});
|
||||
|
||||
TEST_EQUAL(map->Count(), 1, ());
|
||||
uint32_t val;
|
||||
TEST(map->GetThreadsafe(1, val), ());
|
||||
TEST_EQUAL(val, 777, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(MapUint32Val_Smoke)
|
||||
{
|
||||
vector<pair<uint32_t, uint32_t>> data;
|
||||
size_t const dataSize = 227;
|
||||
data.resize(dataSize);
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
data[i] = make_pair(static_cast<uint32_t>(i), static_cast<uint32_t>(i));
|
||||
|
||||
BufferT buffer;
|
||||
{
|
||||
BuilderT builder;
|
||||
for (auto const & d : data)
|
||||
builder.Put(d.first, d.second);
|
||||
|
||||
MemWriter writer(buffer);
|
||||
builder.Freeze(writer, [](Writer & w, BuilderT::Iter begin, BuilderT::Iter end)
|
||||
{
|
||||
for (auto it = begin; it != end; ++it)
|
||||
WriteToSink(w, *it);
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
auto table = MapUint32ToValue<uint32_t>::Load(
|
||||
reader, [](NonOwningReaderSource & source, uint32_t blockSize, ValuesT & values)
|
||||
{
|
||||
values.reserve(blockSize);
|
||||
while (source.Size() > 0)
|
||||
values.push_back(ReadPrimitiveFromSource<uint32_t>(source));
|
||||
});
|
||||
TEST(table.get(), ());
|
||||
|
||||
for (auto const & d : data)
|
||||
{
|
||||
uint32_t res;
|
||||
TEST(table->Get(d.first, res), ());
|
||||
TEST_EQUAL(res, d.second, ());
|
||||
TEST(table->GetThreadsafe(d.first, res), ());
|
||||
TEST_EQUAL(res, d.second, ());
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace map_uint32_tests
|
||||
48
libs/coding/coding_tests/mem_file_reader_test.cpp
Normal file
48
libs/coding/coding_tests/mem_file_reader_test.cpp
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
UNIT_TEST(MemReaderSimple)
|
||||
{
|
||||
char constexpr data[] = "123";
|
||||
size_t constexpr n = ARRAY_SIZE(data);
|
||||
MemReader const memReader(data, n);
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
uint8_t c[4] = {0xff, 0xff, 0xff, 0xff};
|
||||
ReadFromPos(memReader, i, c, n - i);
|
||||
for (size_t j = 0; j < n; ++j)
|
||||
TEST_EQUAL(c[j], i + j < n ? data[i + j] : uint8_t(0xff), (i, j, n));
|
||||
}
|
||||
MemReader const subReader = memReader.SubReader(1, n - 2);
|
||||
for (size_t i = 1; i < n - 1; ++i)
|
||||
{
|
||||
uint8_t c[4] = {0xff, 0xff, 0xff, 0xff};
|
||||
ReadFromPos(subReader, i, c, n - i - 2);
|
||||
for (size_t j = 0; j < n; ++j)
|
||||
TEST_EQUAL(c[j], i + j < n - 2 ? data[i + j + 1] : uint8_t(0xff), (i, j, n));
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(MemReaderStringView)
|
||||
{
|
||||
std::string_view constexpr data = "1234567";
|
||||
MemReader const memReader(data);
|
||||
size_t constexpr n = data.size();
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
uint8_t c[n] = {0};
|
||||
ReadFromPos(memReader, i, c, n - i);
|
||||
for (size_t j = 0; j < n; ++j)
|
||||
TEST_EQUAL(c[j], i + j < n ? data[i + j] : uint8_t{0}, (i, j, n));
|
||||
}
|
||||
MemReader const subReader = memReader.SubReader(1, n - 2);
|
||||
for (size_t i = 1; i < n - 1; ++i)
|
||||
{
|
||||
uint8_t c[n] = {0};
|
||||
ReadFromPos(subReader, i, c, n - i - 2);
|
||||
for (size_t j = 0; j < n; ++j)
|
||||
TEST_EQUAL(c[j], i + j < n - 2 ? data[i + j + 1] : uint8_t{0}, (i, j, n));
|
||||
}
|
||||
}
|
||||
30
libs/coding/coding_tests/mem_file_writer_test.cpp
Normal file
30
libs/coding/coding_tests/mem_file_writer_test.cpp
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
UNIT_TEST(MemWriterEmpty)
|
||||
{
|
||||
std::vector<char> data;
|
||||
{
|
||||
MemWriter<std::vector<char>> writer(data);
|
||||
}
|
||||
TEST(data.empty(), (data));
|
||||
}
|
||||
|
||||
UNIT_TEST(MemWriterSimple)
|
||||
{
|
||||
std::vector<char> data;
|
||||
MemWriter<std::vector<char>> writer(data);
|
||||
writer.Write("Hello", 5);
|
||||
writer.Write(",", 1);
|
||||
writer.Write("world!", 6);
|
||||
|
||||
char const expected[] = "Hello,world!";
|
||||
TEST_EQUAL(data.size(), ARRAY_SIZE(expected) - 1, ());
|
||||
TEST(equal(data.begin(), data.end(), &expected[0]), (data));
|
||||
}
|
||||
32
libs/coding/coding_tests/move_to_front_tests.cpp
Normal file
32
libs/coding/coding_tests/move_to_front_tests.cpp
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/move_to_front.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
using namespace coding;
|
||||
|
||||
namespace
|
||||
{
|
||||
UNIT_TEST(MoveToFront_Smoke)
|
||||
{
|
||||
MoveToFront mtf;
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
TEST_EQUAL(mtf[i], i, ());
|
||||
|
||||
// Initially 3 should be on the 3rd position.
|
||||
TEST_EQUAL(mtf.Transform(3), 3, ());
|
||||
|
||||
// After the first transform, 3 should be moved to the 0th position.
|
||||
TEST_EQUAL(mtf.Transform(3), 0, ());
|
||||
TEST_EQUAL(mtf.Transform(3), 0, ());
|
||||
TEST_EQUAL(mtf.Transform(3), 0, ());
|
||||
|
||||
TEST_EQUAL(mtf[0], 3, ());
|
||||
TEST_EQUAL(mtf[1], 0, ());
|
||||
TEST_EQUAL(mtf[2], 1, ());
|
||||
TEST_EQUAL(mtf[3], 2, ());
|
||||
for (size_t i = 4; i < 256; ++i)
|
||||
TEST_EQUAL(mtf[i], i, ());
|
||||
}
|
||||
} // namespace
|
||||
45
libs/coding/coding_tests/png_decoder_test.cpp
Normal file
45
libs/coding/coding_tests/png_decoder_test.cpp
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void loadFile(vector<unsigned char> & buffer,
|
||||
string const & filename) // designed for loading files from hard disk in an std::vector
|
||||
{
|
||||
ifstream file(filename.c_str(), ios::in | ios::binary | ios::ate);
|
||||
|
||||
// get filesize
|
||||
streamsize size = 0;
|
||||
if (file.seekg(0, ios::end).good())
|
||||
size = file.tellg();
|
||||
if (file.seekg(0, ios::beg).good())
|
||||
size -= file.tellg();
|
||||
|
||||
// read contents of the file into the vector
|
||||
if (size > 0)
|
||||
{
|
||||
buffer.resize((size_t)size);
|
||||
file.read((char *)(&buffer[0]), size);
|
||||
}
|
||||
else
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
UNIT_TEST(PngDecode)
|
||||
{
|
||||
// // load and decode
|
||||
// vector<unsigned char> buffer, image;
|
||||
// loadFile(buffer, "../../data/font_0.png");
|
||||
// unsigned long w, h;
|
||||
// int error = DecodePNG(image, w, h, buffer.empty() ? 0 : &buffer[0], (unsigned long)buffer.size());
|
||||
//
|
||||
// // if there's an error, display it
|
||||
// TEST_EQUAL(error, 0, ());
|
||||
// // the pixels are now in the vector "image", use it as texture, draw it, ...
|
||||
// TEST_GREATER(image.size(), 4, ("Image is empty???"));
|
||||
// TEST_EQUAL(w, 1024, ());
|
||||
// TEST_EQUAL(h, 1024, ());
|
||||
}
|
||||
200
libs/coding/coding_tests/point_coding_tests.cpp
Normal file
200
libs/coding/coding_tests/point_coding_tests.cpp
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/coding_tests/test_polylines.hpp"
|
||||
|
||||
#include "coding/point_coding.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/math.hpp"
|
||||
|
||||
#include <cmath>
|
||||
#include <random>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
double const kEps = kMwmPointAccuracy;
|
||||
uint8_t const kCoordBits = kPointCoordBits;
|
||||
uint32_t const kBig = uint32_t{1} << 30;
|
||||
|
||||
void CheckEqualPoints(m2::PointD const & p1, m2::PointD const & p2)
|
||||
{
|
||||
TEST(p1.EqualDxDy(p2, kEps), (p1, p2));
|
||||
|
||||
TEST_GREATER_OR_EQUAL(p1.x, -180.0, ());
|
||||
TEST_GREATER_OR_EQUAL(p1.y, -180.0, ());
|
||||
TEST_LESS_OR_EQUAL(p1.x, 180.0, ());
|
||||
TEST_LESS_OR_EQUAL(p1.y, 180.0, ());
|
||||
|
||||
TEST_GREATER_OR_EQUAL(p2.x, -180.0, ());
|
||||
TEST_GREATER_OR_EQUAL(p2.y, -180.0, ());
|
||||
TEST_LESS_OR_EQUAL(p2.x, 180.0, ());
|
||||
TEST_LESS_OR_EQUAL(p2.y, 180.0, ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(PointDToPointU_Epsilons)
|
||||
{
|
||||
m2::PointD const arrPt[] = {{-180, -180}, {-180, 180}, {180, 180}, {180, -180}};
|
||||
m2::PointD const arrD[] = {{1, 1}, {1, -1}, {-1, -1}, {-1, 1}};
|
||||
size_t const count = ARRAY_SIZE(arrPt);
|
||||
|
||||
double eps = 1.0;
|
||||
while (true)
|
||||
{
|
||||
size_t i = 0;
|
||||
for (; i < count; ++i)
|
||||
{
|
||||
m2::PointU p0 = PointDToPointU(arrPt[i].x, arrPt[i].y, kCoordBits);
|
||||
m2::PointU p1 = PointDToPointU(arrPt[i].x + arrD[i].x * eps, arrPt[i].y + arrD[i].y * eps, kCoordBits);
|
||||
|
||||
if (p0 != p1)
|
||||
break;
|
||||
}
|
||||
if (i == count)
|
||||
break;
|
||||
|
||||
eps *= 0.1;
|
||||
}
|
||||
|
||||
LOG(LINFO, ("Epsilon (relative error) =", eps));
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
m2::PointU const p1 = PointDToPointU(arrPt[i].x, arrPt[i].y, kCoordBits);
|
||||
m2::PointU const p2(p1.x + arrD[i].x, p1.y + arrD[i].y);
|
||||
m2::PointD const p3 = PointUToPointD(p2, kCoordBits);
|
||||
|
||||
LOG(LINFO, ("Dx =", p3.x - arrPt[i].x, "Dy =", p3.y - arrPt[i].y));
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(PointDToPointU_WithLimitRect)
|
||||
{
|
||||
mt19937 rng(0);
|
||||
|
||||
m2::PointD const limitRectOrigin[] = {{0.0, 0.0}, {10.0, 10.0}, {90.0, 90.0}, {160.0, 160.0}};
|
||||
double const limitRectSize[] = {0.1, 1.0, 5.0, 10.0, 20.0};
|
||||
size_t const pointsPerRect = 100;
|
||||
|
||||
for (auto const & origin : limitRectOrigin)
|
||||
{
|
||||
for (auto const sizeX : limitRectSize)
|
||||
{
|
||||
for (auto const sizeY : limitRectSize)
|
||||
{
|
||||
m2::RectD const limitRect(origin.x, origin.y, origin.x + sizeX, origin.y + sizeY);
|
||||
auto distX = uniform_real_distribution<double>(limitRect.minX(), limitRect.maxX());
|
||||
auto distY = uniform_real_distribution<double>(limitRect.minY(), limitRect.maxY());
|
||||
auto const coordBits = GetCoordBits(limitRect, kEps);
|
||||
TEST_NOT_EQUAL(coordBits, 0, ());
|
||||
// All rects in this test are more than 2 times smaller than mercator range.
|
||||
TEST_LESS(coordBits, kCoordBits, (limitRect));
|
||||
for (size_t i = 0; i < pointsPerRect; ++i)
|
||||
{
|
||||
auto const pt = m2::PointD(distX(rng), distY(rng));
|
||||
auto const pointU = PointDToPointU(pt, coordBits, limitRect);
|
||||
auto const pointD = PointUToPointD(pointU, coordBits, limitRect);
|
||||
TEST(AlmostEqualAbs(pt, pointD, kEps), (limitRect, pt, pointD, coordBits, kEps));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(PointToInt64Obsolete_Smoke)
|
||||
{
|
||||
m2::PointD const arr[] = {{1.25, 1.3}, {180, 90}, {-180, -90}, {0, 0}};
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(arr); ++i)
|
||||
CheckEqualPoints(arr[i], Int64ToPointObsolete(PointToInt64Obsolete(arr[i], kCoordBits), kCoordBits));
|
||||
}
|
||||
|
||||
UNIT_TEST(PointToInt64Obsolete_Grid)
|
||||
{
|
||||
int const delta = 5;
|
||||
for (int ix = -180; ix <= 180; ix += delta)
|
||||
{
|
||||
for (int iy = -180; iy <= 180; iy += delta)
|
||||
{
|
||||
m2::PointD const pt(ix, iy);
|
||||
int64_t const id = PointToInt64Obsolete(pt, kCoordBits);
|
||||
m2::PointD const pt1 = Int64ToPointObsolete(id, kCoordBits);
|
||||
|
||||
CheckEqualPoints(pt, pt1);
|
||||
|
||||
int64_t const id1 = PointToInt64Obsolete(pt1, kCoordBits);
|
||||
TEST_EQUAL(id, id1, (pt, pt1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(PointToInt64Obsolete_Bounds)
|
||||
{
|
||||
double const arrEps[] = {-1.0E-2, -1.0E-3, -1.0E-4, 0, 1.0E-4, 1.0E-3, 1.0E-2};
|
||||
|
||||
m2::PointD const arrPt[] = {{0, 0}, {-180, -180}, {-180, 180}, {180, 180}, {180, -180},
|
||||
{-90, -90}, {-90, 90}, {90, 90}, {90, -90}};
|
||||
|
||||
for (size_t iP = 0; iP < ARRAY_SIZE(arrPt); ++iP)
|
||||
{
|
||||
for (size_t iX = 0; iX < ARRAY_SIZE(arrEps); ++iX)
|
||||
{
|
||||
for (size_t iY = 0; iY < ARRAY_SIZE(arrEps); ++iY)
|
||||
{
|
||||
m2::PointD const pt(arrPt[iP].x + arrEps[iX], arrPt[iP].y + arrEps[iY]);
|
||||
m2::PointD const pt1 = Int64ToPointObsolete(PointToInt64Obsolete(pt, kCoordBits), kCoordBits);
|
||||
|
||||
TEST(fabs(pt.x - pt1.x) <= (fabs(arrEps[iX]) + kEps) && fabs(pt.y - pt1.y) <= (fabs(arrEps[iY]) + kEps),
|
||||
(pt, pt1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(PointUToUint64Obsolete_0)
|
||||
{
|
||||
TEST_EQUAL(0, PointUToUint64Obsolete(m2::PointU(0, 0)), ());
|
||||
TEST_EQUAL(m2::PointU(0, 0), Uint64ToPointUObsolete(0), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PointUToUint64Obsolete_Interlaced)
|
||||
{
|
||||
TEST_EQUAL(0xAAAAAAAAAAAAAAAAULL, PointUToUint64Obsolete(m2::PointU(0, 0xFFFFFFFF)), ());
|
||||
TEST_EQUAL(0x5555555555555555ULL, PointUToUint64Obsolete(m2::PointU(0xFFFFFFFF, 0)), ());
|
||||
TEST_EQUAL(0xAAAAAAAAAAAAAAA8ULL, PointUToUint64Obsolete(m2::PointU(0, 0xFFFFFFFE)), ());
|
||||
TEST_EQUAL(0x5555555555555554ULL, PointUToUint64Obsolete(m2::PointU(0xFFFFFFFE, 0)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PointUToUint64Obsolete_1bit)
|
||||
{
|
||||
TEST_EQUAL(2, PointUToUint64Obsolete(m2::PointU(0, 1)), ());
|
||||
TEST_EQUAL(m2::PointU(0, 1), Uint64ToPointUObsolete(2), ());
|
||||
TEST_EQUAL(1, PointUToUint64Obsolete(m2::PointU(1, 0)), ());
|
||||
TEST_EQUAL(m2::PointU(1, 0), Uint64ToPointUObsolete(1), ());
|
||||
|
||||
TEST_EQUAL(3ULL << 60, PointUToUint64Obsolete(m2::PointU(kBig, kBig)), ());
|
||||
TEST_EQUAL((1ULL << 60) - 1, PointUToUint64Obsolete(m2::PointU(kBig - 1, kBig - 1)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(PointToInt64Obsolete_DataSet1)
|
||||
{
|
||||
using namespace geometry_coding_tests;
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(arr1); ++i)
|
||||
{
|
||||
m2::PointD const pt(arr1[i].x, arr1[i].y);
|
||||
int64_t const id = PointToInt64Obsolete(pt, kCoordBits);
|
||||
m2::PointD const pt1 = Int64ToPointObsolete(id, kCoordBits);
|
||||
|
||||
CheckEqualPoints(pt, pt1);
|
||||
|
||||
int64_t const id1 = PointToInt64Obsolete(pt1, kCoordBits);
|
||||
TEST_EQUAL(id, id1, (pt, pt1));
|
||||
}
|
||||
}
|
||||
49
libs/coding/coding_tests/reader_cache_test.cpp
Normal file
49
libs/coding/coding_tests/reader_cache_test.cpp
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/reader_cache.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class ReaderT>
|
||||
class CacheReader
|
||||
{
|
||||
public:
|
||||
CacheReader(ReaderT const & reader, uint32_t logPageSize, uint32_t logPageCount)
|
||||
: m_Reader(reader)
|
||||
, m_Cache(logPageSize, logPageCount)
|
||||
{}
|
||||
|
||||
void Read(uint64_t pos, void * p, size_t size) const { m_Cache.Read(m_Reader, pos, p, size); }
|
||||
|
||||
private:
|
||||
ReaderT m_Reader;
|
||||
ReaderCache<ReaderT const> mutable m_Cache;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(CacheReaderRandomTest)
|
||||
{
|
||||
vector<char> data(100000);
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
data[i] = static_cast<char>(i % 253);
|
||||
MemReader memReader(&data[0], data.size());
|
||||
CacheReader<MemReader> cacheReader(MemReader(&data[0], data.size()), 10, 5);
|
||||
mt19937 rng(0);
|
||||
for (size_t i = 0; i < 100000; ++i)
|
||||
{
|
||||
size_t pos = rng() % data.size();
|
||||
size_t len = min(static_cast<size_t>(1 + (rng() % 127)), data.size() - pos);
|
||||
string readMem(len, '0'), readCache(len, '0');
|
||||
memReader.Read(pos, &readMem[0], len);
|
||||
cacheReader.Read(pos, &readCache[0], len);
|
||||
TEST_EQUAL(readMem, readCache, (pos, len, i));
|
||||
}
|
||||
}
|
||||
122
libs/coding/coding_tests/reader_test.cpp
Normal file
122
libs/coding/coding_tests/reader_test.cpp
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/coding_tests/reader_test.hpp"
|
||||
|
||||
#include "coding/buffer_reader.hpp"
|
||||
#include "coding/file_reader.hpp"
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/reader_streambuf.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
string const kData("Quick brown fox jumps over a lazy dog...");
|
||||
}
|
||||
|
||||
UNIT_TEST(MemReaderSmokeTest)
|
||||
{
|
||||
MemReader memReader(kData.c_str(), kData.size());
|
||||
TestReader(memReader);
|
||||
}
|
||||
|
||||
UNIT_TEST(FileReaderSmokeTest)
|
||||
{
|
||||
{
|
||||
FileWriter writer("reader_test_tmp.dat");
|
||||
writer.Write(&kData[0], kData.size());
|
||||
}
|
||||
|
||||
{
|
||||
FileReader fileReader("reader_test_tmp.dat");
|
||||
TestReader(fileReader);
|
||||
}
|
||||
FileWriter::DeleteFileX("reader_test_tmp.dat");
|
||||
}
|
||||
|
||||
UNIT_TEST(BufferReaderSmokeTest)
|
||||
{
|
||||
BufferReader r1(&kData[0], kData.size());
|
||||
TestReader(r1);
|
||||
|
||||
{
|
||||
string const data("BlaBla " + kData);
|
||||
FileWriter writer("reader_test_tmp.dat");
|
||||
writer.Write(&data[0], data.size());
|
||||
}
|
||||
|
||||
BufferReader r2(FileReader("reader_test_tmp.dat"), 7);
|
||||
TestReader(r2);
|
||||
FileWriter::DeleteFileX("reader_test_tmp.dat");
|
||||
}
|
||||
|
||||
UNIT_TEST(BufferReaderEmptyTest)
|
||||
{
|
||||
MemReader reader(NULL, 0);
|
||||
BufferReader bufReader(reader, 0);
|
||||
TEST_EQUAL(bufReader.Size(), 0, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(FileReaderNonExistentFileTest)
|
||||
{
|
||||
try
|
||||
{
|
||||
FileReader reader("skjhfaxniauiuq2bmnszmn093sklsd");
|
||||
TEST(false, ("Exception should be thrown!"));
|
||||
}
|
||||
catch (FileReader::OpenException &)
|
||||
{}
|
||||
}
|
||||
|
||||
UNIT_TEST(FileReaderReadAsText)
|
||||
{
|
||||
char const fName[] = "zzzuuuuuummmba";
|
||||
{
|
||||
FileWriter f(fName);
|
||||
f.Write(fName, ARRAY_SIZE(fName) - 1);
|
||||
}
|
||||
|
||||
{
|
||||
string text;
|
||||
FileReader(fName).ReadAsString(text);
|
||||
TEST_EQUAL(text, fName, ());
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(fName);
|
||||
}
|
||||
|
||||
UNIT_TEST(ReaderStreamBuf)
|
||||
{
|
||||
string const name = "test.txt";
|
||||
|
||||
{
|
||||
FileWriter writer(name);
|
||||
WriterStreamBuf buffer(writer);
|
||||
ostream s(&buffer);
|
||||
s << "hey!" << '\n' << 1 << '\n' << 3.14 << '\n' << 0x0102030405060708ull << endl;
|
||||
}
|
||||
|
||||
{
|
||||
ReaderStreamBuf buffer(make_unique<FileReader>(name));
|
||||
istream s(&buffer);
|
||||
|
||||
string str;
|
||||
int i;
|
||||
double d;
|
||||
unsigned long long ull;
|
||||
|
||||
s >> str >> i >> d >> ull;
|
||||
|
||||
TEST_EQUAL(str, "hey!", ());
|
||||
TEST_EQUAL(i, 1, ());
|
||||
TEST_ALMOST_EQUAL_ULPS(d, 3.14, ());
|
||||
TEST_EQUAL(ull, 0x0102030405060708ull, ());
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(name);
|
||||
}
|
||||
51
libs/coding/coding_tests/reader_test.hpp
Normal file
51
libs/coding/coding_tests/reader_test.hpp
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#pragma once
|
||||
|
||||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class ReaderT>
|
||||
void ReadToStringFromPos(ReaderT const & reader, std::string & str, uint64_t pos, size_t size)
|
||||
{
|
||||
str.resize(size);
|
||||
reader.Read(pos, &str[0], str.size());
|
||||
}
|
||||
|
||||
template <class SourceT>
|
||||
void ReadToStringFromSource(SourceT & source, std::string & str, size_t size)
|
||||
{
|
||||
str.resize(size);
|
||||
source.Read(&str[0], str.size());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename ReaderT>
|
||||
void TestReader(ReaderT const & reader)
|
||||
{
|
||||
ReaderSource<ReaderT> source(reader);
|
||||
std::string d1;
|
||||
ReadToStringFromSource(source, d1, 6);
|
||||
TEST_EQUAL(d1, "Quick ", ());
|
||||
|
||||
ReadToStringFromSource(source, d1, 6);
|
||||
TEST_EQUAL(d1, "brown ", ());
|
||||
|
||||
ReaderT subReader = source.SubReader(10);
|
||||
ReadToStringFromPos(subReader, d1, 1, 3);
|
||||
TEST_EQUAL(d1, "ox ", ());
|
||||
|
||||
ReaderT subSubReader = subReader.SubReader(2, 8);
|
||||
ReadToStringFromPos(subSubReader, d1, 0, 2);
|
||||
TEST_EQUAL(d1, "x ", ());
|
||||
|
||||
ReadToStringFromSource(source, d1, 5);
|
||||
TEST_EQUAL(d1, "over ", ());
|
||||
|
||||
ReaderSource<ReaderT> subReaderSource(subReader);
|
||||
ReadToStringFromSource(subReaderSource, d1, 6);
|
||||
TEST_EQUAL(d1, "fox ju", ());
|
||||
}
|
||||
153
libs/coding/coding_tests/reader_writer_ops_test.cpp
Normal file
153
libs/coding/coding_tests/reader_writer_ops_test.cpp
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/byte_stream.hpp"
|
||||
#include "coding/file_reader.hpp"
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/read_write_utils.hpp"
|
||||
#include "coding/reader_writer_ops.hpp"
|
||||
|
||||
#include "base/random.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
namespace rw_ops_tests
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
void GetReverseForReaderAndTmpFile(Reader const & src, vector<char> & buffer)
|
||||
{
|
||||
char const * tmpFile = "reversed_file.tmp";
|
||||
|
||||
{
|
||||
FileWriter writer(tmpFile);
|
||||
rw_ops::Reverse(src, writer);
|
||||
}
|
||||
|
||||
{
|
||||
FileReader reader(tmpFile);
|
||||
buffer.clear();
|
||||
MemWriter<vector<char>> writer(buffer);
|
||||
rw_ops::Reverse(reader, writer);
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(tmpFile);
|
||||
}
|
||||
|
||||
void FillRandFile(string const & fName, size_t count)
|
||||
{
|
||||
FileWriter writer(fName);
|
||||
|
||||
base::UniformRandom<int8_t> rand;
|
||||
|
||||
while (count-- > 0)
|
||||
{
|
||||
int8_t const c = rand();
|
||||
writer.Write(&c, 1);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(Reverse_Smoke)
|
||||
{
|
||||
{
|
||||
char arr[] = {0xA, 0xB, 0xC, 0xD, 0xF};
|
||||
size_t const sz = ARRAY_SIZE(arr);
|
||||
|
||||
MemReader reader(&arr[0], sz);
|
||||
|
||||
vector<char> buffer;
|
||||
GetReverseForReaderAndTmpFile(reader, buffer);
|
||||
|
||||
TEST_EQUAL(buffer.size(), ARRAY_SIZE(arr), ());
|
||||
TEST(equal(arr, arr + ARRAY_SIZE(arr), buffer.begin()), ());
|
||||
}
|
||||
|
||||
{
|
||||
char const * tmpFile = "random_file.tmp";
|
||||
{
|
||||
FillRandFile(tmpFile, 10 * 1024 + 527);
|
||||
FileReader reader(tmpFile);
|
||||
|
||||
vector<char> buffer;
|
||||
GetReverseForReaderAndTmpFile(reader, buffer);
|
||||
|
||||
string str;
|
||||
reader.ReadAsString(str);
|
||||
TEST_EQUAL(str.size(), buffer.size(), ());
|
||||
TEST(equal(str.begin(), str.end(), buffer.begin()), ());
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(tmpFile);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct ThePOD
|
||||
{
|
||||
uint32_t m_i;
|
||||
double m_d;
|
||||
};
|
||||
|
||||
bool operator==(ThePOD const & r1, ThePOD const & r2)
|
||||
{
|
||||
return (r1.m_i == r2.m_i && r1.m_d == r2.m_d);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(ReadWrite_POD)
|
||||
{
|
||||
base::UniformRandom<uint32_t> rand;
|
||||
|
||||
size_t const count = 1000;
|
||||
vector<ThePOD> src(1000);
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
src[i].m_i = rand();
|
||||
src[i].m_d = double(rand()) / double(rand());
|
||||
}
|
||||
|
||||
vector<char> buffer;
|
||||
PushBackByteSink<vector<char>> sink(buffer);
|
||||
rw::WriteVectorOfPOD(sink, src);
|
||||
|
||||
buffer_vector<ThePOD, 128> dest;
|
||||
ArrayByteSource byteSrc(buffer.data());
|
||||
rw::ReadVectorOfPOD(byteSrc, dest);
|
||||
|
||||
TEST(equal(src.begin(), src.end(), dest.begin()), ());
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class T>
|
||||
void TestIntegral()
|
||||
{
|
||||
std::vector<T> ethalon{static_cast<T>(-1), 0, 1, static_cast<T>(-2), 2, std::numeric_limits<T>::min(),
|
||||
std::numeric_limits<T>::max()};
|
||||
|
||||
std::string buffer;
|
||||
MemWriter writer(buffer);
|
||||
rw::Write(writer, ethalon);
|
||||
|
||||
std::vector<T> expected;
|
||||
MemReader reader(buffer);
|
||||
ReaderSource src(reader);
|
||||
rw::Read(src, expected);
|
||||
|
||||
TEST_EQUAL(ethalon, expected, ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(ReadWrite_Integral)
|
||||
{
|
||||
TestIntegral<uint32_t>();
|
||||
TestIntegral<int32_t>();
|
||||
TestIntegral<uint64_t>();
|
||||
TestIntegral<int64_t>();
|
||||
}
|
||||
|
||||
} // namespace rw_ops_tests
|
||||
265
libs/coding/coding_tests/serdes_json_test.cpp
Normal file
265
libs/coding/coding_tests/serdes_json_test.cpp
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/serdes_json.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
#include "base/visitor.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <deque>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename Ptr>
|
||||
bool SamePtrValue(Ptr const & lhs, Ptr const & rhs)
|
||||
{
|
||||
return (!lhs && !rhs) || (lhs && rhs && *lhs == *rhs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool TestSerDes(T const & value)
|
||||
{
|
||||
string jsonStr;
|
||||
{
|
||||
using Sink = MemWriter<string>;
|
||||
Sink sink(jsonStr);
|
||||
coding::SerializerJson<Sink> ser(sink);
|
||||
ser(value);
|
||||
}
|
||||
|
||||
T deserializedValue;
|
||||
try
|
||||
{
|
||||
coding::DeserializerJson des(jsonStr);
|
||||
des(deserializedValue);
|
||||
}
|
||||
catch (base::Json::Exception const & exception)
|
||||
{
|
||||
LOG(LWARNING, ("Exception while parsing json string, reason:", exception.what(), "json:", jsonStr));
|
||||
return false;
|
||||
}
|
||||
return deserializedValue == value;
|
||||
}
|
||||
|
||||
enum class TestEnum
|
||||
{
|
||||
Value0 = 0,
|
||||
Value1,
|
||||
Value2,
|
||||
Count
|
||||
};
|
||||
|
||||
struct ValueTypes
|
||||
{
|
||||
DECLARE_VISITOR(visitor(m_boolValue, "boolValue"), visitor(m_uint8Value, "uint8Value"),
|
||||
visitor(m_uint32Value, "uint32Value"), visitor(m_uint64Value, "uint64Value"),
|
||||
visitor(m_int8Value, "int8Value"), visitor(m_int32Value, "int32Value"),
|
||||
visitor(m_int64Value, "int64Value"), visitor(m_doubleValue, "doubleValue"),
|
||||
visitor(m_stringValue, "stringValue"), visitor(m_enumValue, "enumValue"),
|
||||
visitor(m_timePointValue, "timePointValue"))
|
||||
|
||||
ValueTypes() = default;
|
||||
ValueTypes(uint32_t testCounter)
|
||||
: m_boolValue(static_cast<bool>(testCounter % 2))
|
||||
, m_uint8Value(numeric_limits<uint8_t>::max() - static_cast<uint8_t>(testCounter))
|
||||
, m_uint32Value(numeric_limits<uint32_t>::max() - testCounter)
|
||||
, m_uint64Value(numeric_limits<uint64_t>::max() - testCounter)
|
||||
, m_int8Value(numeric_limits<int8_t>::min() + static_cast<int8_t>(testCounter))
|
||||
, m_int32Value(numeric_limits<int32_t>::min() + static_cast<int32_t>(testCounter))
|
||||
, m_int64Value(numeric_limits<int64_t>::min() + static_cast<int64_t>(testCounter))
|
||||
, m_doubleValue(numeric_limits<double>::max() - testCounter)
|
||||
, m_stringValue(strings::to_string(testCounter))
|
||||
, m_enumValue(static_cast<TestEnum>(testCounter % static_cast<uint32_t>(TestEnum::Count)))
|
||||
, m_timePointValue(chrono::system_clock::now())
|
||||
{}
|
||||
|
||||
bool operator==(ValueTypes const & rhs) const
|
||||
{
|
||||
return m_boolValue == rhs.m_boolValue && m_uint8Value == rhs.m_uint8Value && m_uint32Value == rhs.m_uint32Value &&
|
||||
m_uint64Value == rhs.m_uint64Value && m_int8Value == rhs.m_int8Value && m_int32Value == rhs.m_int32Value &&
|
||||
m_int64Value == rhs.m_int64Value && m_doubleValue == rhs.m_doubleValue &&
|
||||
m_stringValue == rhs.m_stringValue && m_enumValue == rhs.m_enumValue &&
|
||||
m_timePointValue == rhs.m_timePointValue;
|
||||
}
|
||||
|
||||
bool m_boolValue;
|
||||
uint8_t m_uint8Value;
|
||||
uint32_t m_uint32Value;
|
||||
uint64_t m_uint64Value;
|
||||
int8_t m_int8Value;
|
||||
int32_t m_int32Value;
|
||||
int64_t m_int64Value;
|
||||
double m_doubleValue;
|
||||
string m_stringValue;
|
||||
TestEnum m_enumValue;
|
||||
chrono::system_clock::time_point m_timePointValue;
|
||||
};
|
||||
|
||||
struct ObjectTypes
|
||||
{
|
||||
DECLARE_VISITOR(visitor(m_pointValue, "pointValue"), visitor(m_latLonValue, "latLonValue"),
|
||||
visitor(m_pairValue, "pairValue"))
|
||||
|
||||
ObjectTypes() = default;
|
||||
ObjectTypes(uint32_t testCounter)
|
||||
: m_pointValue(testCounter, testCounter)
|
||||
, m_latLonValue(testCounter, testCounter)
|
||||
, m_pairValue(testCounter, strings::to_string(testCounter))
|
||||
{}
|
||||
|
||||
bool operator==(ObjectTypes const & rhs) const
|
||||
{
|
||||
return m_pointValue == rhs.m_pointValue && m_latLonValue == rhs.m_latLonValue && m_pairValue == rhs.m_pairValue;
|
||||
}
|
||||
|
||||
m2::PointD m_pointValue;
|
||||
ms::LatLon m_latLonValue;
|
||||
pair<uint32_t, string> m_pairValue;
|
||||
};
|
||||
|
||||
struct PointerTypes
|
||||
{
|
||||
DECLARE_VISITOR(visitor(m_uniquePtrValue, "uniquePtrValue"), visitor(m_sharedPtrValue, "sharedPtrValue"))
|
||||
|
||||
PointerTypes() = default;
|
||||
PointerTypes(uint32_t testCounter)
|
||||
{
|
||||
m_uniquePtrValue = make_unique<ValueTypes>(testCounter);
|
||||
m_sharedPtrValue = make_shared<ValueTypes>(testCounter);
|
||||
}
|
||||
|
||||
bool operator==(PointerTypes const & rhs) const
|
||||
{
|
||||
return SamePtrValue(m_uniquePtrValue, rhs.m_uniquePtrValue) && SamePtrValue(m_sharedPtrValue, rhs.m_sharedPtrValue);
|
||||
}
|
||||
|
||||
unique_ptr<ValueTypes> m_uniquePtrValue;
|
||||
shared_ptr<ValueTypes> m_sharedPtrValue;
|
||||
};
|
||||
|
||||
struct ArrayTypes
|
||||
{
|
||||
DECLARE_VISITOR(visitor(m_arrayValue, "arrayValue"), visitor(m_dequeValue, "dequeValue"),
|
||||
visitor(m_vectorValue, "vectorValue"), visitor(m_mapValue, "mapValue"),
|
||||
visitor(m_unorderedSetValue, "unorderedSetValue"))
|
||||
|
||||
ArrayTypes() = default;
|
||||
ArrayTypes(uint32_t testCounter)
|
||||
: m_arrayValue({{testCounter, testCounter + 1, testCounter + 2}})
|
||||
, m_dequeValue({testCounter + 2, testCounter + 1, testCounter})
|
||||
, m_vectorValue({testCounter, testCounter + 2, testCounter + 1})
|
||||
, m_mapValue({{testCounter, testCounter}, {testCounter + 1, testCounter + 1}})
|
||||
, m_unorderedSetValue({testCounter + 2, testCounter, testCounter + 1})
|
||||
{}
|
||||
|
||||
bool operator==(ArrayTypes const & rhs) const
|
||||
{
|
||||
return m_arrayValue == rhs.m_arrayValue && m_dequeValue == rhs.m_dequeValue && m_vectorValue == rhs.m_vectorValue &&
|
||||
m_mapValue == rhs.m_mapValue && m_unorderedSetValue == rhs.m_unorderedSetValue;
|
||||
}
|
||||
|
||||
array<uint32_t, 3> m_arrayValue;
|
||||
deque<uint32_t> m_dequeValue;
|
||||
vector<uint32_t> m_vectorValue;
|
||||
map<uint32_t, uint32_t> m_mapValue;
|
||||
unordered_set<uint32_t> m_unorderedSetValue;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(SerdesJsonTest)
|
||||
{
|
||||
{
|
||||
ValueTypes valueTypes(0);
|
||||
TEST(TestSerDes(valueTypes), ());
|
||||
|
||||
ObjectTypes objectTypes(0);
|
||||
TEST(TestSerDes(objectTypes), ());
|
||||
|
||||
PointerTypes pointersTypes(0);
|
||||
TEST(TestSerDes(pointersTypes), ());
|
||||
|
||||
ArrayTypes arrayTypes(0);
|
||||
TEST(TestSerDes(arrayTypes), ());
|
||||
}
|
||||
|
||||
{
|
||||
pair<string, m2::PointD> testValue = {"test", m2::PointD(1.0, 2.0)};
|
||||
TEST(TestSerDes(testValue), ());
|
||||
}
|
||||
|
||||
{
|
||||
pair<m2::PointD, m2::PointD> testValue = {m2::PointD(1.0, 2.0), m2::PointD(2.0, 3.0)};
|
||||
TEST(TestSerDes(testValue), ());
|
||||
}
|
||||
|
||||
{
|
||||
pair<string, pair<string, string>> testValue = {"test", {"test1", "test2"}};
|
||||
TEST(TestSerDes(testValue), ());
|
||||
}
|
||||
|
||||
{
|
||||
pair<string, ValueTypes> testValue = {"test", ValueTypes(0)};
|
||||
TEST(TestSerDes(testValue), ());
|
||||
}
|
||||
|
||||
{
|
||||
array<ObjectTypes, 2> testValue = {{ObjectTypes(0), ObjectTypes(1)}};
|
||||
TEST(TestSerDes(testValue), ());
|
||||
}
|
||||
|
||||
{
|
||||
struct Hasher
|
||||
{
|
||||
size_t operator()(pair<string, string> const & item) const { return m_hasher(item.first + item.second); }
|
||||
|
||||
hash<string> m_hasher;
|
||||
};
|
||||
|
||||
unordered_set<pair<string, string>, Hasher> testValue = {{"ab", "ab"}, {"ef", "ef"}, {"cd", "cd"}};
|
||||
TEST(TestSerDes(testValue), ());
|
||||
}
|
||||
|
||||
{
|
||||
vector<vector<uint32_t>> testValue;
|
||||
for (uint32_t i = 0; i < 5; ++i)
|
||||
testValue.push_back({i, i, i});
|
||||
TEST(TestSerDes(testValue), ());
|
||||
}
|
||||
|
||||
{
|
||||
vector<ValueTypes> valuesVector;
|
||||
for (uint32_t i = 0; i < 5; ++i)
|
||||
valuesVector.push_back(ValueTypes(i));
|
||||
TEST(TestSerDes(valuesVector), ());
|
||||
}
|
||||
|
||||
{
|
||||
map<uint32_t, ValueTypes> valuesMap;
|
||||
for (uint32_t i = 0; i < 5; ++i)
|
||||
valuesMap.insert(make_pair(i, ValueTypes(i)));
|
||||
TEST(TestSerDes(valuesMap), ());
|
||||
}
|
||||
|
||||
{
|
||||
vector<ObjectTypes> objectsVector;
|
||||
for (uint32_t i = 0; i < 5; ++i)
|
||||
objectsVector.push_back(ObjectTypes(i));
|
||||
TEST(TestSerDes(objectsVector), ());
|
||||
}
|
||||
|
||||
{
|
||||
map<uint32_t, ObjectTypes> objectsMap;
|
||||
for (uint32_t i = 0; i < 5; ++i)
|
||||
objectsMap.insert(make_pair(i, ObjectTypes(i)));
|
||||
TEST(TestSerDes(objectsMap), ());
|
||||
}
|
||||
}
|
||||
49
libs/coding/coding_tests/sha1_test.cpp
Normal file
49
libs/coding/coding_tests/sha1_test.cpp
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/sha1.hpp"
|
||||
|
||||
namespace sha1_test
|
||||
{
|
||||
using namespace coding;
|
||||
|
||||
UNIT_TEST(SHA1_Smoke)
|
||||
{
|
||||
char const * bytes[] = {
|
||||
"H",
|
||||
"He",
|
||||
"Hel",
|
||||
"Hell",
|
||||
"Hello",
|
||||
"Hello,",
|
||||
"Hello, ",
|
||||
"Hello, World!",
|
||||
"Organic Maps is the ultimate companion app for travellers, tourists, hikers, and cyclists!",
|
||||
};
|
||||
|
||||
SHA1::Hash encoded[] = {
|
||||
{0x7C, 0xF1, 0x84, 0xF4, 0xC6, 0x7A, 0xD5, 0x82, 0x83, 0xEC,
|
||||
0xB1, 0x93, 0x49, 0x72, 0x0B, 0x0C, 0xAE, 0x75, 0x68, 0x29},
|
||||
{0x53, 0xA4, 0x17, 0x79, 0x6C, 0x77, 0x78, 0x51, 0x00, 0x3B,
|
||||
0x3F, 0x24, 0x31, 0xE8, 0xEE, 0xF5, 0x62, 0x5E, 0xC1, 0x5B},
|
||||
{0xDB, 0xC2, 0xD1, 0xFE, 0xD0, 0xDC, 0x37, 0xA7, 0x0A, 0xEA,
|
||||
0x0F, 0x37, 0x69, 0x58, 0xC8, 0x02, 0xED, 0xDC, 0x05, 0x59},
|
||||
{0xED, 0x10, 0xFE, 0x11, 0x3D, 0xE1, 0xC0, 0xBD, 0xAA, 0xAA,
|
||||
0xF0, 0x9B, 0x88, 0xCD, 0x34, 0x1E, 0xA0, 0xF4, 0x44, 0x28},
|
||||
{0xF7, 0xFF, 0x9E, 0x8B, 0x7B, 0xB2, 0xE0, 0x9B, 0x70, 0x93,
|
||||
0x5A, 0x5D, 0x78, 0x5E, 0x0C, 0xC5, 0xD9, 0xD0, 0xAB, 0xF0},
|
||||
{0x65, 0x61, 0x1E, 0x95, 0x20, 0xE7, 0x68, 0x14, 0x5D, 0xAD,
|
||||
0xAA, 0x1D, 0x10, 0x7F, 0xDD, 0x52, 0x07, 0xE6, 0x30, 0x57},
|
||||
{0xF5, 0x2A, 0xB5, 0x7F, 0xA5, 0x1D, 0xFA, 0x71, 0x45, 0x05,
|
||||
0x29, 0x44, 0x44, 0x46, 0x3A, 0xE5, 0xA0, 0x09, 0xAE, 0x34},
|
||||
{0x0A, 0x0A, 0x9F, 0x2A, 0x67, 0x72, 0x94, 0x25, 0x57, 0xAB,
|
||||
0x53, 0x55, 0xD7, 0x6A, 0xF4, 0x42, 0xF8, 0xF6, 0x5E, 0x01},
|
||||
{0x48, 0xF5, 0x4D, 0x3D, 0x08, 0xD5, 0xC0, 0x57, 0x6B, 0x3A,
|
||||
0xC5, 0x3E, 0xEF, 0x22, 0x4A, 0xB8, 0x46, 0x7B, 0xA2, 0xFC},
|
||||
};
|
||||
|
||||
static_assert(std::size(bytes) == std::size(encoded));
|
||||
|
||||
for (size_t i = 0; i < std::size(bytes); ++i)
|
||||
TEST_EQUAL(SHA1::CalculateForString(bytes[i]), encoded[i], ());
|
||||
}
|
||||
} // namespace sha1_test
|
||||
89
libs/coding/coding_tests/simple_dense_coding_test.cpp
Normal file
89
libs/coding/coding_tests/simple_dense_coding_test.cpp
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/mmap_reader.hpp"
|
||||
#include "coding/simple_dense_coding.hpp"
|
||||
#include "coding/succinct_mapper.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/scope_guard.hpp"
|
||||
|
||||
#include <limits>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace simple_dense_coding_test
|
||||
{
|
||||
using namespace coding;
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
void TestSDC(vector<uint8_t> const & data, SimpleDenseCoding const & coding)
|
||||
{
|
||||
TEST_EQUAL(data.size(), coding.Size(), ());
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
TEST_EQUAL(data[i], coding.Get(i), ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(SimpleDenseCoding_Smoke)
|
||||
{
|
||||
size_t const kSize = numeric_limits<uint8_t>::max();
|
||||
vector<uint8_t> data(kSize);
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
data[i] = i;
|
||||
|
||||
string const kTestFile = "test.tmp";
|
||||
SCOPE_GUARD(cleanup, bind(&FileWriter::DeleteFileX, kTestFile));
|
||||
|
||||
{
|
||||
SimpleDenseCoding coding(data);
|
||||
TestSDC(data, coding);
|
||||
FileWriter writer(kTestFile);
|
||||
Freeze(coding, writer, "SimpleDenseCoding");
|
||||
}
|
||||
|
||||
{
|
||||
MmapReader reader(kTestFile);
|
||||
SimpleDenseCoding coding;
|
||||
Map(coding, reader.Data(), "SimpleDenseCoding");
|
||||
TestSDC(data, coding);
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(SimpleDenseCoding_Ratio)
|
||||
{
|
||||
for (uint8_t const maxValue : {16, 32, 64})
|
||||
{
|
||||
size_t constexpr kSize = 1 << 20;
|
||||
|
||||
normal_distribution<> randDist(maxValue / 2, 2);
|
||||
random_device randDevice;
|
||||
mt19937 randEngine(randDevice());
|
||||
|
||||
vector<uint8_t> data(kSize);
|
||||
for (size_t i = 0; i < kSize; ++i)
|
||||
{
|
||||
double d = round(randDist(randEngine));
|
||||
if (d < 0)
|
||||
d = 0;
|
||||
else if (d > maxValue)
|
||||
d = maxValue;
|
||||
data[i] = static_cast<uint8_t>(d);
|
||||
}
|
||||
|
||||
SimpleDenseCoding coding(data);
|
||||
TestSDC(data, coding);
|
||||
|
||||
vector<uint8_t> buffer;
|
||||
MemWriter writer(buffer);
|
||||
Freeze(coding, writer, "");
|
||||
|
||||
auto const ratio = data.size() / double(buffer.size());
|
||||
LOG(LINFO, (maxValue, ratio));
|
||||
TEST_GREATER(ratio, 1.8, ());
|
||||
}
|
||||
}
|
||||
} // namespace simple_dense_coding_test
|
||||
26
libs/coding/coding_tests/sparse_vector_tests.cpp
Normal file
26
libs/coding/coding_tests/sparse_vector_tests.cpp
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/sparse_vector.hpp"
|
||||
|
||||
UNIT_TEST(SparseVector_Smoke)
|
||||
{
|
||||
uint32_t const arr[] = {0, 0, 5, 0, 7, 1000, 0, 0, 1, 0};
|
||||
uint64_t const count = std::size(arr);
|
||||
|
||||
coding::SparseVectorBuilder<uint32_t> builder(count);
|
||||
for (uint32_t v : arr)
|
||||
if (v == 0)
|
||||
builder.PushEmpty();
|
||||
else
|
||||
builder.PushValue(v);
|
||||
|
||||
auto vec = builder.Build();
|
||||
|
||||
TEST_EQUAL(vec.GetSize(), count, ());
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
TEST_EQUAL(vec.Has(i), (arr[i] != 0), ());
|
||||
if (arr[i] != 0)
|
||||
TEST_EQUAL(vec.Get(i), arr[i], ());
|
||||
}
|
||||
}
|
||||
263
libs/coding/coding_tests/string_utf8_multilang_tests.cpp
Normal file
263
libs/coding/coding_tests/string_utf8_multilang_tests.cpp
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/string_utf8_multilang.hpp"
|
||||
|
||||
#include "base/control_flow.hpp"
|
||||
|
||||
#include <utf8.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
struct lang_string
|
||||
{
|
||||
char const * m_lang;
|
||||
char const * m_str;
|
||||
};
|
||||
|
||||
lang_string gArr[] = {{"default", "default"},
|
||||
{"en", "abcd"},
|
||||
{"ru", "\xD0\xA0\xD0\xB0\xD1\x88\xD0\xBA\xD0\xB0"},
|
||||
{"be", "\xE2\x82\xAC\xF0\xA4\xAD\xA2"}};
|
||||
|
||||
void TestMultilangString(lang_string const * arr, size_t count)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
string src(arr[i].m_str);
|
||||
TEST(utf8::is_valid(src.begin(), src.end()), ());
|
||||
|
||||
s.AddString(arr[i].m_lang, src);
|
||||
|
||||
string_view comp;
|
||||
TEST(s.GetString(arr[i].m_lang, comp), ());
|
||||
TEST_EQUAL(src, comp, ());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
string_view comp;
|
||||
TEST(s.GetString(arr[i].m_lang, comp), ());
|
||||
TEST_EQUAL(arr[i].m_str, comp, ());
|
||||
}
|
||||
|
||||
string_view test;
|
||||
TEST(!s.GetString("xxx", test), ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(MultilangString_Smoke)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
|
||||
TestMultilangString(gArr, ARRAY_SIZE(gArr));
|
||||
}
|
||||
|
||||
UNIT_TEST(MultilangString_ForEach)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
for (size_t i = 0; i < ARRAY_SIZE(gArr); ++i)
|
||||
s.AddString(gArr[i].m_lang, gArr[i].m_str);
|
||||
|
||||
{
|
||||
size_t index = 0;
|
||||
s.ForEach([&index](char lang, string_view utf8s)
|
||||
{
|
||||
TEST_EQUAL(lang, StringUtf8Multilang::GetLangIndex(gArr[index].m_lang), ());
|
||||
TEST_EQUAL(utf8s, gArr[index].m_str, ());
|
||||
++index;
|
||||
});
|
||||
TEST_EQUAL(index, ARRAY_SIZE(gArr), ());
|
||||
}
|
||||
|
||||
{
|
||||
size_t index = 0;
|
||||
vector<string> const expected = {"default", "en", "ru"};
|
||||
vector<string> actual;
|
||||
s.ForEach([&index, &actual](char lang, string_view)
|
||||
{
|
||||
actual.push_back(gArr[index].m_lang);
|
||||
++index;
|
||||
if (index == 3)
|
||||
return base::ControlFlow::Break;
|
||||
return base::ControlFlow::Continue;
|
||||
});
|
||||
TEST_EQUAL(index, 3, ());
|
||||
TEST_EQUAL(actual, expected, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(MultilangString_Unique)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
string_view cmp;
|
||||
|
||||
s.AddString(0, "xxx");
|
||||
TEST(s.GetString(0, cmp), ());
|
||||
TEST_EQUAL(cmp, "xxx", ());
|
||||
|
||||
s.AddString(1, "yyy");
|
||||
TEST(s.GetString(1, cmp), ());
|
||||
TEST_EQUAL(cmp, "yyy", ());
|
||||
|
||||
s.AddString(0, "xxxxxx");
|
||||
TEST(s.GetString(0, cmp), ());
|
||||
TEST_EQUAL(cmp, "xxxxxx", ());
|
||||
TEST(s.GetString(1, cmp), ());
|
||||
TEST_EQUAL(cmp, "yyy", ());
|
||||
|
||||
s.AddString(0, "x");
|
||||
TEST(s.GetString(0, cmp), ());
|
||||
TEST_EQUAL(cmp, "x", ());
|
||||
TEST(s.GetString(1, cmp), ());
|
||||
TEST_EQUAL(cmp, "yyy", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(MultilangString_LangNames)
|
||||
{
|
||||
// It is important to compare the contents of the strings, and not just pointers
|
||||
TEST_EQUAL(string("Беларуская"), StringUtf8Multilang::GetLangNameByCode(StringUtf8Multilang::GetLangIndex("be")), ());
|
||||
|
||||
auto const & langs = StringUtf8Multilang::GetSupportedLanguages();
|
||||
// Using size_t workaround, because our logging/testing macroses do not support passing POD types
|
||||
// by value, only by reference. And our constant is a constexpr.
|
||||
TEST_LESS_OR_EQUAL(langs.size(), static_cast<size_t>(StringUtf8Multilang::kMaxSupportedLanguages), ());
|
||||
auto const international = StringUtf8Multilang::GetLangIndex("int_name");
|
||||
TEST_EQUAL(langs[international].m_code, string("int_name"), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(MultilangString_HasString)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
s.AddString(0, "xxx");
|
||||
s.AddString(18, "yyy");
|
||||
s.AddString(63, "zzz");
|
||||
|
||||
TEST(s.HasString(0), ());
|
||||
TEST(s.HasString(18), ());
|
||||
TEST(s.HasString(63), ());
|
||||
|
||||
TEST(!s.HasString(1), ());
|
||||
TEST(!s.HasString(32), ());
|
||||
}
|
||||
|
||||
/*
|
||||
UNIT_TEST(MultilangString_ForEachLanguage)
|
||||
{
|
||||
using Translations = vector<pair<string, string>>;
|
||||
StringUtf8Multilang s;
|
||||
Translations const scotlandTranslations = {
|
||||
{"be", "Шатландыя"}, {"cs", "Skotsko"}, {"cy", "Yr Alban"}, {"da", "Skotland"},
|
||||
{"de", "Schottland"}, {"eo", "Skotlando"}, {"es", "Escocia"}, {"eu", "Eskozia"},
|
||||
{"fi", "Skotlanti"}, {"fr", "Écosse"}, {"ga", "Albain"}, {"gd", "Alba"},
|
||||
{"hr", "Škotska"}, {"ia", "Scotia"}, {"io", "Skotia"}, {"ja", "スコットランド"},
|
||||
{"ku", "Skotland"}, {"lfn", "Scotland"}, {"nl", "Schotland"}, {"pl", "Szkocja"},
|
||||
{"ru", "Шотландия"}, {"sco", "Scotland"}, {"sk", "Škótsko"}, {"sr", "Шкотска"},
|
||||
{"sv", "Skottland"}, {"tok", "Sukosi"}, {"tzl", "Escot"}, {"uk", "Шотландія"},
|
||||
{"vo", "Skotän"}, {"zh", "苏格兰"}};
|
||||
|
||||
Translations const usedTranslations = {
|
||||
{"be", "Шатландыя"}, {"cs", "Skotsko"}, {"eu", "Eskozia"}, {"zh", "苏格兰"}};
|
||||
|
||||
for (auto const & langAndTranslation : scotlandTranslations)
|
||||
{
|
||||
s.AddString(langAndTranslation.first, langAndTranslation.second);
|
||||
}
|
||||
|
||||
set<string> testAccumulator;
|
||||
vector<string> const preferredLanguages = {"cs", "eu", "be", "zh"};
|
||||
vector<string> const preferredTranslations = {"Skotsko", "Eskozia", "Шатландыя", "苏格兰"};
|
||||
|
||||
auto const fn = [&testAccumulator, &usedTranslations](int8_t code, string const & name) {
|
||||
testAccumulator.insert(name);
|
||||
if (usedTranslations.size() > testAccumulator.size())
|
||||
return base::ControlFlow::Continue;
|
||||
return base::ControlFlow::Break;
|
||||
};
|
||||
|
||||
TEST(s.ForEachLanguage(preferredLanguages, fn), ());
|
||||
TEST_EQUAL(testAccumulator.size(), preferredTranslations.size(), ());
|
||||
|
||||
for (string const & translation : preferredTranslations)
|
||||
{
|
||||
TEST(testAccumulator.find(translation) != testAccumulator.end(), ());
|
||||
}
|
||||
|
||||
testAccumulator.clear();
|
||||
vector<string> const corruptedLanguages = {"Матерный", "Детский", "BirdLanguage"};
|
||||
TEST(!s.ForEachLanguage(corruptedLanguages, fn), ());
|
||||
TEST_EQUAL(testAccumulator.size(), 0, ());
|
||||
}
|
||||
*/
|
||||
|
||||
UNIT_TEST(MultilangString_RemoveString)
|
||||
{
|
||||
auto testRemove = [](vector<pair<uint8_t, string>> const & strings, set<uint8_t> const & codesToRemove)
|
||||
{
|
||||
StringUtf8Multilang str;
|
||||
for (auto const & s : strings)
|
||||
str.AddString(s.first, s.second);
|
||||
|
||||
string_view tmp;
|
||||
for (auto const & s : strings)
|
||||
{
|
||||
TEST(str.HasString(s.first), ());
|
||||
TEST(str.GetString(s.first, tmp), ());
|
||||
TEST_EQUAL(tmp, s.second, ());
|
||||
}
|
||||
|
||||
for (auto c : codesToRemove)
|
||||
str.RemoveString(c);
|
||||
|
||||
for (auto const & s : strings)
|
||||
{
|
||||
if (codesToRemove.find(s.first) == codesToRemove.end())
|
||||
{
|
||||
TEST(str.HasString(s.first), ());
|
||||
TEST(str.GetString(s.first, tmp), ());
|
||||
TEST_EQUAL(tmp, s.second, ());
|
||||
}
|
||||
else
|
||||
{
|
||||
TEST(!str.HasString(s.first), ());
|
||||
}
|
||||
}
|
||||
|
||||
// No extra languages or other data damage.
|
||||
str.ForEach([&](uint8_t lang, auto const &)
|
||||
{
|
||||
TEST(base::FindIf(strings, [&lang](auto const & s) { return s.first == lang; }) != strings.end(), ());
|
||||
TEST(codesToRemove.find(lang) == codesToRemove.end(), ());
|
||||
});
|
||||
};
|
||||
|
||||
vector<pair<uint8_t, string>> strings = {{0, "aaa"}, {1, "bbb"}, {2, "ccc"}, {9, "ddd"},
|
||||
{17, "eee"}, {27, "fff"}, {37, "ggg"}};
|
||||
|
||||
testRemove(strings, {0});
|
||||
testRemove(strings, {1});
|
||||
testRemove(strings, {9, 27});
|
||||
testRemove(strings, {37});
|
||||
testRemove(strings, {0, 1, 2, 9, 17, 27, 37});
|
||||
testRemove(strings, {39});
|
||||
}
|
||||
|
||||
UNIT_TEST(MultilangString_Buffers)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
s.AddString(StringUtf8Multilang::kInternationalCode, "blabla");
|
||||
|
||||
StringUtf8Multilang const ss = StringUtf8Multilang::FromBuffer(std::string(s.GetBuffer()));
|
||||
|
||||
std::string_view test;
|
||||
TEST_EQUAL(ss.CountLangs(), 1, ());
|
||||
TEST(ss.GetString(StringUtf8Multilang::kInternationalCode, test), ());
|
||||
TEST_EQUAL(test, "blabla", ());
|
||||
}
|
||||
90
libs/coding/coding_tests/succinct_ef_test.cpp
Normal file
90
libs/coding/coding_tests/succinct_ef_test.cpp
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/succinct_mapper.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "3party/succinct/elias_fano_compressed_list.hpp"
|
||||
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
namespace succinct_ef_test
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
template <class T>
|
||||
vector<T> GetUniformValues(size_t count)
|
||||
{
|
||||
// Use max - 1 because succinct makes val + 1 encoding internals.
|
||||
uniform_int_distribution<T> randDist(0, numeric_limits<T>::max() - 1);
|
||||
random_device randDevice;
|
||||
mt19937 randEngine(randDevice());
|
||||
|
||||
vector<T> data(count);
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
data[i] = randDist(randEngine);
|
||||
return data;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
vector<T> GetNormalValues(size_t count, T mean)
|
||||
{
|
||||
normal_distribution<> randDist(mean, 2);
|
||||
random_device randDevice;
|
||||
mt19937 randEngine(randDevice());
|
||||
|
||||
vector<T> data(count);
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
// Use max - 1 because succinct makes val + 1 encoding internals.
|
||||
T constexpr const kMax = numeric_limits<T>::max() - 1;
|
||||
double d = round(randDist(randEngine));
|
||||
if (d < 0)
|
||||
d = 0;
|
||||
else if (d > kMax)
|
||||
d = kMax;
|
||||
data[i] = static_cast<T>(d);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
double GetCompressionRatio(vector<T> const & data)
|
||||
{
|
||||
succinct::elias_fano_compressed_list efList(data);
|
||||
|
||||
vector<uint8_t> buffer;
|
||||
MemWriter writer(buffer);
|
||||
coding::Freeze(efList, writer, "");
|
||||
|
||||
return data.size() * sizeof(T) / double(buffer.size());
|
||||
}
|
||||
|
||||
UNIT_TEST(SuccinctEFList_Ratio)
|
||||
{
|
||||
size_t constexpr kCount = 1 << 20;
|
||||
|
||||
{
|
||||
// No need to use EFList for generic data.
|
||||
double const ratio2 = GetCompressionRatio(GetUniformValues<uint16_t>(kCount));
|
||||
TEST_LESS(ratio2, 1, ());
|
||||
LOG(LINFO, ("Uniform ratio 2:", ratio2));
|
||||
|
||||
double const ratio4 = GetCompressionRatio(GetUniformValues<uint32_t>(kCount));
|
||||
TEST_LESS(ratio4, 1, ());
|
||||
LOG(LINFO, ("Uniform ratio 4:", ratio4));
|
||||
}
|
||||
|
||||
{
|
||||
// EF is good for some kind of normal distribution of small values.
|
||||
double const ratio2 = GetCompressionRatio(GetNormalValues(kCount, uint16_t(128)));
|
||||
TEST_GREATER(ratio2, 1, ());
|
||||
LOG(LINFO, ("Normal ratio 2:", ratio2));
|
||||
|
||||
double const ratio4 = GetCompressionRatio(GetNormalValues(kCount, uint32_t(1024)));
|
||||
TEST_GREATER(ratio4, 1, ());
|
||||
LOG(LINFO, ("Normal ratio 4:", ratio4));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace succinct_ef_test
|
||||
55
libs/coding/coding_tests/succinct_mapper_test.cpp
Normal file
55
libs/coding/coding_tests/succinct_mapper_test.cpp
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/succinct_mapper.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "3party/succinct/mapper.hpp"
|
||||
|
||||
using namespace coding;
|
||||
|
||||
UNIT_TEST(ReverseMapper_Smoke)
|
||||
{
|
||||
uint64_t data = 0x0123456789abcdef;
|
||||
uint64_t rdata = 0x0;
|
||||
TEST_EQUAL(8, ReverseMap(rdata, reinterpret_cast<uint8_t *>(&data), "uint64_t"), ());
|
||||
|
||||
// Test that reversed uint64_t was read.
|
||||
TEST_EQUAL(0xefcdab8967452301, rdata, ());
|
||||
|
||||
// Test that underlying buffer was modified.
|
||||
TEST_EQUAL(0xefcdab8967452301, data, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Freeze_Smoke)
|
||||
{
|
||||
std::vector<uint8_t> data;
|
||||
{
|
||||
MemWriter<decltype(data)> writer(data);
|
||||
uint64_t const data = 0x0123456789abcdef;
|
||||
Freeze(data, writer, "uint64_t");
|
||||
}
|
||||
TEST_EQUAL(8, data.size(), ());
|
||||
|
||||
uint64_t value = 0x0;
|
||||
TEST_EQUAL(8, Map(value, reinterpret_cast<uint8_t const *>(data.data()), "uint64_t"), ());
|
||||
TEST_EQUAL(0x0123456789abcdef, value, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(ReverseFreeze_Smoke)
|
||||
{
|
||||
std::vector<uint8_t> data;
|
||||
{
|
||||
MemWriter<decltype(data)> writer(data);
|
||||
uint64_t const data = 0x0123456789abcdef;
|
||||
ReverseFreeze(data, writer, "uint64_t");
|
||||
}
|
||||
|
||||
TEST_EQUAL(8, data.size(), ());
|
||||
|
||||
uint64_t value = 0x0;
|
||||
TEST_EQUAL(8, Map(value, reinterpret_cast<uint8_t const *>(data.data()), "uint64_t"), ());
|
||||
TEST_EQUAL(0xefcdab8967452301, value, ());
|
||||
}
|
||||
193
libs/coding/coding_tests/test_polylines.cpp
Normal file
193
libs/coding/coding_tests/test_polylines.cpp
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
#include "coding/coding_tests/test_polylines.hpp"
|
||||
|
||||
namespace geometry_coding_tests
|
||||
{
|
||||
P arr1[376] = {P(25.624035299999999182, 72.26346513007850092), P(25.624273200000001083, 72.263461698303601111),
|
||||
P(25.624488899999999347, 72.26341365347376211), P(25.624979400000000851, 72.263304218156179104),
|
||||
P(25.626030799999998777, 72.263025101705878228), P(25.629390999999998257, 72.261676817778678128),
|
||||
P(25.630162399999999678, 72.26138836631159279), P(25.631299500000000791, 72.260963603282490908),
|
||||
P(25.63236829999999955, 72.26051310574631259), P(25.63325580000000059, 72.260190152533994024),
|
||||
P(25.633720499999999021, 72.260019906865807116), P(25.634314799999998513, 72.259865485075735592),
|
||||
P(25.634578999999998672, 72.259830215951140531), P(25.635424199999999217, 72.259772832171691448),
|
||||
P(25.635776400000001018, 72.259834791404088605), P(25.638406499999998545, 72.260604806439260983),
|
||||
P(25.639231599999998679, 72.260931765228107793), P(25.639867699999999928, 72.261237563690428942),
|
||||
P(25.640699399999999031, 72.261850499331046649), P(25.643624299999999039, 72.264447578158552687),
|
||||
P(25.644772700000000754, 72.265904403664706024), P(25.645413800000000037, 72.267106341816230497),
|
||||
P(25.646751600000001758, 72.270404536824941033), P(25.64890219999999843, 72.275985791150915816),
|
||||
P(25.649064599999999103, 72.276404165523842948), P(25.650549500000000336, 72.279974564589863917),
|
||||
P(25.651433600000000723, 72.281545386607334081), P(25.652029899999998719, 72.282193025251160634),
|
||||
P(25.652814700000000414, 72.282915237415323872), P(25.654197199999998702, 72.283799562153532747),
|
||||
P(25.656540400000000801, 72.285055792411071707), P(25.658162999999998277, 72.286263412818769325),
|
||||
P(25.661959599999999426, 72.289916920742129491), P(25.663380199999998865, 72.291039561736027963),
|
||||
P(25.665810499999999195, 72.292780588759853799), P(25.6700361000000008, 72.29585629709197292),
|
||||
P(25.670962599999999298, 72.296655718166547899), P(25.672222699999998952, 72.297961211704517837),
|
||||
P(25.673103499999999855, 72.29896171301187735), P(25.674837499999998869, 72.300952077677095531),
|
||||
P(25.676358000000000459, 72.302732468128681376), P(25.678018200000000348, 72.304444228347662715),
|
||||
P(25.680309600000001069, 72.306619426588397914), P(25.682252600000001763, 72.308208994982337003),
|
||||
P(25.685880300000000886, 72.310749482551628375), P(25.6871223999999998, 72.311619291531712861),
|
||||
P(25.689502399999998516, 72.313337574126506979), P(25.689994200000001001, 72.313685586072296019),
|
||||
P(25.691337099999998372, 72.314639003020189989), P(25.694014100000000411, 72.316465930359882464),
|
||||
P(25.696650399999999337, 72.318133963117716689), P(25.697924300000000386, 72.31863598381848135),
|
||||
P(25.699229800000001234, 72.31891418618496914), P(25.700213699999999051, 72.319045273707061483),
|
||||
P(25.703616300000000194, 72.319271576784373678), P(25.707311499999999427, 72.319273484907995453),
|
||||
P(25.715181600000001083, 72.318046763400587906), P(25.72608460000000008, 72.315978426880036523),
|
||||
P(25.728649600000000675, 72.31539857900408208), P(25.730824299999998317, 72.315156452495600092),
|
||||
P(25.732753200000001215, 72.314945427265811873), P(25.736661200000000349, 72.315042353781024076),
|
||||
P(25.74480259999999987, 72.315568583243575063), P(25.747831600000001373, 72.315649864883624787),
|
||||
P(25.749809599999998966, 72.315866807206518274), P(25.752535200000000515, 72.316023647210727177),
|
||||
P(25.755610000000000781, 72.315910501039496694), P(25.760463999999998919, 72.315272459413776573),
|
||||
P(25.762314700000001011, 72.315021747344800929), P(25.763456399999999036, 72.314812630534717641),
|
||||
P(25.763716200000001066, 72.31478954377344337), P(25.771413500000001306, 72.314102668549878672),
|
||||
P(25.779617200000000565, 72.313375160856324442), P(25.784148800000000534, 72.313357035273327256),
|
||||
P(25.790238899999998523, 72.313577786126856495), P(25.793676300000001334, 72.313716876708198811),
|
||||
P(25.796280599999999339, 72.314048100429985766), P(25.798680499999999682, 72.31463614103191162),
|
||||
P(25.800190700000001698, 72.315239260045032665), P(25.803071100000000371, 72.316310615756250968),
|
||||
P(25.806439499999999754, 72.316835901112042961), P(25.809219599999998707, 72.316657116642062419),
|
||||
P(25.813906700000000427, 72.315918133153061831), P(25.817769800000000657, 72.31543750249576874),
|
||||
P(25.819804099999998925, 72.315482531661231747), P(25.823219200000000484, 72.315995217547779816),
|
||||
P(25.824360999999999677, 72.316092908788874638), P(25.825752500000000111, 72.316000750836963107),
|
||||
P(25.833053499999998337, 72.315183355397863352), P(25.835087900000001326, 72.314863574077250519),
|
||||
P(25.836477299999998536, 72.314986830897922232), P(25.838510800000001666, 72.315843910886087542),
|
||||
P(25.84021669999999915, 72.316586137240363996), P(25.845591399999999993, 72.318366369042564656),
|
||||
P(25.847287900000001315, 72.318912278071522337), P(25.852937300000000675, 72.321233538069833457),
|
||||
P(25.857534099999998745, 72.324114950429262194), P(25.858493899999999144, 72.324638770105451613),
|
||||
P(25.859516599999999187, 72.325101910243901671), P(25.860960299999998568, 72.325309341574609334),
|
||||
P(25.864481800000000078, 72.325170990340012622), P(25.866295099999998541, 72.325066225249685203),
|
||||
P(25.871619400000000155, 72.324758609934391984), P(25.873917800000000966, 72.324524655307570242),
|
||||
P(25.875719000000000136, 72.324229064532204347), P(25.882352300000000866, 72.322516991669758113),
|
||||
P(25.886094899999999797, 72.321551632301222412), P(25.891463999999999146, 72.320154280548763381),
|
||||
P(25.892594599999998906, 72.32000410941930113), P(25.893775399999999109, 72.320041127430243932),
|
||||
P(25.895055100000000436, 72.320205228136387632), P(25.901716900000000265, 72.321479884460799781),
|
||||
P(25.905201399999999268, 72.322148897878847151), P(25.906758400000001075, 72.322300409542663147),
|
||||
P(25.908453200000000294, 72.322276366107203671), P(25.910453700000001476, 72.322039939449879853),
|
||||
P(25.912611200000000622, 72.321379323121732341), P(25.914446699999999169, 72.320507670602822259),
|
||||
P(25.915890699999998503, 72.319578403757603269), P(25.916971199999998987, 72.318721085380474278),
|
||||
P(25.923277999999999821, 72.312682767056259081), P(25.924315100000001166, 72.311643903530907096),
|
||||
P(25.925479700000000349, 72.310661910829537646), P(25.926380200000000542, 72.31012846985993292),
|
||||
P(25.927288000000000778, 72.309673827336439444), P(25.929170299999999116, 72.308742039167825055),
|
||||
P(25.931695000000001272, 72.307558244187632113), P(25.935542200000000435, 72.305689970006980616),
|
||||
P(25.936291600000000557, 72.305420216334297834), P(25.937011699999999337, 72.3052109385934898),
|
||||
P(25.937444899999999137, 72.305171830245583919), P(25.938065999999999178, 72.305126426436075349),
|
||||
P(25.939194700000001603, 72.305346959512363014), P(25.941637199999998842, 72.306187700803491225),
|
||||
P(25.951531899999999098, 72.309363611414866568), P(25.958591599999998323, 72.311600021678131611),
|
||||
P(25.961859900000000323, 72.312588133461261464), P(25.9623209000000017, 72.312845323461488078),
|
||||
P(25.962808800000001241, 72.313126745396871797), P(25.963783500000001681, 72.313929806056449934),
|
||||
P(25.964454100000001091, 72.315054565005411291), P(25.966293799999998981, 72.319575350745964215),
|
||||
P(25.966609900000001687, 72.320173934482440359), P(25.966938999999999993, 72.320628647970096381),
|
||||
P(25.968776200000000642, 72.322731857094510133), P(25.969766299999999859, 72.323772036806516894),
|
||||
P(25.97039970000000153, 72.324406914991570261), P(25.971057800000000526, 72.324904784282267656),
|
||||
P(25.972805199999999815, 72.325716763759459127), P(25.973508700000000005, 72.326106631888762877),
|
||||
P(25.974174900000001287, 72.326699167072590058), P(25.974623600000001034, 72.327462886785923502),
|
||||
P(25.97499170000000035, 72.32822527930542833), P(25.975826399999998984, 72.329784823533856297),
|
||||
P(25.976481499999998448, 72.330935420885211329), P(25.977230399999999833, 72.332212952428704966),
|
||||
P(25.978115400000000079, 72.333512265445278899), P(25.9789551000000003, 72.33474671239962106),
|
||||
P(25.980276700000001, 72.336402410819303554), P(25.98169719999999927, 72.337880836033434662),
|
||||
P(25.983172299999999666, 72.33911288186702393), P(25.984414600000000917, 72.340068567971513858),
|
||||
P(25.985398499999998734, 72.340636603533639004), P(25.986058100000001048, 72.340908025445514795),
|
||||
P(25.987230000000000274, 72.341316496490946975), P(25.988157300000001015, 72.341676869267246275),
|
||||
P(25.991148400000000152, 72.342299318530393748), P(25.997876999999999015, 72.343701138883602653),
|
||||
P(25.999752600000000768, 72.344154484369809666), P(26.001479700000000861, 72.344723890629211382),
|
||||
P(26.003023999999999916, 72.345420432028205937), P(26.005314899999998346, 72.346859159309715892),
|
||||
P(26.007066099999999409, 72.348322733682408625), P(26.008686999999998335, 72.35014618535842601),
|
||||
P(26.012360000000001037, 72.354910262506038521), P(26.013286199999999582, 72.355943685106993257),
|
||||
P(26.013858500000001328, 72.35652369166834319), P(26.014633599999999802, 72.357135968669368253),
|
||||
P(26.015746700000001113, 72.357673410043958029), P(26.017126499999999822, 72.358212001250265644),
|
||||
P(26.020520199999999988, 72.359278695677289761), P(26.021437599999998724, 72.359644892510004865),
|
||||
P(26.022532699999999295, 72.360275718006846546), P(26.028545999999998628, 72.365263533617877556),
|
||||
P(26.029226600000001213, 72.365797602942478761), P(26.030111600000001459, 72.366317546512846093),
|
||||
P(26.032004199999999372, 72.367306080501194288), P(26.033209299999999331, 72.367834246590078351),
|
||||
P(26.034265699999998844, 72.368067397148493569), P(26.035592099999998794, 72.368224167962054594),
|
||||
P(26.03677019999999942, 72.368129074294643033), P(26.043432299999999202, 72.366408627750374194),
|
||||
P(26.045431499999999403, 72.365842856777021552), P(26.048415399999999664, 72.36504242213915461),
|
||||
P(26.052753299999999115, 72.363920454888528866), P(26.05556269999999941, 72.363008918012667436),
|
||||
P(26.060303699999998628, 72.360393712052541559), P(26.065962500000001256, 72.35698705139280662),
|
||||
P(26.067612400000001571, 72.356026924714299753), P(26.069255399999999412, 72.355021374242639354),
|
||||
P(26.070335599999999943, 72.354163985856629893), P(26.071483900000000489, 72.353231772141796796),
|
||||
P(26.073087300000000965, 72.351530224288538307), P(26.07495580000000146, 72.349052146600300262),
|
||||
P(26.077375199999998756, 72.345412414793742073), P(26.079008800000000434, 72.34322240936705839),
|
||||
P(26.080636800000000619, 72.341554327036718064), P(26.081818800000000635, 72.340620379333103074),
|
||||
P(26.083176200000000478, 72.339615440891947173), P(26.085581000000001239, 72.338285853103528211),
|
||||
P(26.092078799999999461, 72.335142167729841844), P(26.099516500000000008, 72.332061609286498083),
|
||||
P(26.102282500000001164, 72.330882175026999903), P(26.105014700000001682, 72.329521843521945357),
|
||||
P(26.108211900000000583, 72.327720133658942814), P(26.116759299999998234, 72.322424061632020198),
|
||||
P(26.118289900000000614, 72.321345929920937579), P(26.124188000000000187, 72.316306990481081129),
|
||||
P(26.126093300000000852, 72.314456217615472156), P(26.13131840000000139, 72.308768748722727082),
|
||||
P(26.133807300000000851, 72.305896196846916268), P(26.135103199999999646, 72.304208818196542552),
|
||||
P(26.13615610000000089, 72.3027141546473473), P(26.136958199999998698, 72.301545345164157652),
|
||||
P(26.137658200000000619, 72.300474224549915903), P(26.140487000000000251, 72.29551524417688313),
|
||||
P(26.146685800000000199, 72.285760107870132174), P(26.151274499999999534, 72.277504651282583836),
|
||||
P(26.151979099999998368, 72.276113553331668982), P(26.152562700000000717, 72.274582520714972134),
|
||||
P(26.152978600000000853, 72.272986691312326002), P(26.154697899999998612, 72.264608683472175699),
|
||||
P(26.155105599999998844, 72.263003939235275652), P(26.155811400000001044, 72.261258344309723611),
|
||||
P(26.156706599999999696, 72.259655777039213831), P(26.158511799999999425, 72.257073180827120495),
|
||||
P(26.163497199999998344, 72.251147710512896083), P(26.164152500000000146, 72.250452144382251163),
|
||||
P(26.165397099999999853, 72.249370018656591697), P(26.171159400000000517, 72.245101348184562084),
|
||||
P(26.171824600000000771, 72.244502288299599968), P(26.172791700000001214, 72.243464858038208831),
|
||||
P(26.173422299999998586, 72.24251111483852128), P(26.174280599999999453, 72.240982180618559028),
|
||||
P(26.174924399999998315, 72.239409446329290176), P(26.175138900000000319, 72.238550480576279256),
|
||||
P(26.177894599999998348, 72.222417606854094174), P(26.178249600000000896, 72.220799387733251251),
|
||||
P(26.178700899999999052, 72.219414415122045625), P(26.179689899999999625, 72.217234222262234766),
|
||||
P(26.182073200000001378, 72.213506738076645775), P(26.18310470000000123, 72.211533626956168064),
|
||||
P(26.183614800000000855, 72.210338776927230242), P(26.18428000000000111, 72.208417574177602205),
|
||||
P(26.185804499999999706, 72.203266316303412964), P(26.186153000000000901, 72.202346286216979365),
|
||||
P(26.186549599999999316, 72.201465316811109574), P(26.187059699999998941, 72.200685882789031211),
|
||||
P(26.187643699999998859, 72.200064170625580573), P(26.188815999999999207, 72.199110470754774838),
|
||||
P(26.189986799999999789, 72.198491439723213148), P(26.190943999999998226, 72.198205925482497491),
|
||||
P(26.192045499999998981, 72.198064597333782899), P(26.201502200000000187, 72.19749033573828001),
|
||||
P(26.204289599999999183, 72.197194731015855496), P(26.212046699999998367, 72.196023752898682346),
|
||||
P(26.217400099999998986, 72.195033541852339454), P(26.220660899999998605, 72.194099530393685882),
|
||||
P(26.223864100000000121, 72.193042117073559893), P(26.227025699999998665, 72.192404096537160285),
|
||||
P(26.229406099999998503, 72.192154413131575552), P(26.23379059999999896, 72.191934250652863625),
|
||||
P(26.241092200000000645, 72.191652763688111349), P(26.247795599999999894, 72.191305763109099303),
|
||||
P(26.259740499999999486, 72.190710990755292187), P(26.262441899999998896, 72.190662426481935654),
|
||||
P(26.26396259999999927, 72.190803739092231694), P(26.265582200000000768, 72.19108065172507338),
|
||||
P(26.271514700000000886, 72.192273445913514252), P(26.275603900000000124, 72.192994312937273094),
|
||||
P(26.278289999999998372, 72.193506828374651718), P(26.280647800000000558, 72.193799369593079973),
|
||||
P(26.284991699999999071, 72.194193426147350579), P(26.295021899999998283, 72.194996021158502231),
|
||||
P(26.296629599999999272, 72.195353135208762296), P(26.298219400000000689, 72.195936520796209379),
|
||||
P(26.299353599999999886, 72.196573622487093758), P(26.300700500000001369, 72.19746290844136638),
|
||||
P(26.301440499999998224, 72.198127833072547332), P(26.302059899999999715, 72.198747051231549676),
|
||||
P(26.302597999999999701, 72.199118470577644757), P(26.30326700000000173, 72.200164931796578571),
|
||||
P(26.304018299999999186, 72.201524555689601925), P(26.305375600000001413, 72.20513574950004454),
|
||||
P(26.306215500000000418, 72.206942181028665573), P(26.307179600000001329, 72.208595118825385839),
|
||||
P(26.307805599999998236, 72.209443034325843769), P(26.308593200000000678, 72.210334966852684602),
|
||||
P(26.309511400000001657, 72.211171854914510959), P(26.310345000000001647, 72.211829485157878139),
|
||||
P(26.313103999999999161, 72.213550746524816759), P(26.313808999999999116, 72.214105903186023738),
|
||||
P(26.315858999999999668, 72.21616368063173752), P(26.316473599999998356, 72.216713905276705532),
|
||||
P(26.317261800000000704, 72.217105619191144683), P(26.318279199999999207, 72.217451609641841515),
|
||||
P(26.31951039999999864, 72.217778930438797147), P(26.319995200000001034, 72.217883719155963718),
|
||||
P(26.322028199999998321, 72.21814340535271981), P(26.323134799999998279, 72.218219615725388394),
|
||||
P(26.324022500000001656, 72.218280774611798734), P(26.32581220000000144, 72.218525220186265301),
|
||||
P(26.327261700000001099, 72.218861882068196678), P(26.330273800000000506, 72.219715642811124212),
|
||||
P(26.337171999999998917, 72.221928497785057743), P(26.339137900000000769, 72.222394361231621929),
|
||||
P(26.341438799999998821, 72.222689314479467271), P(26.343669200000000785, 72.222811640430336411),
|
||||
P(26.346788899999999956, 72.222677310542948703), P(26.356923500000000615, 72.222042438730937874),
|
||||
P(26.359536099999999692, 72.2221015051835451), P(26.36183730000000125, 72.222299854521224916),
|
||||
P(26.366428899999998947, 72.222842507761527031), P(26.374883000000000521, 72.223912965077033732),
|
||||
P(26.380090800000001394, 72.224542709845593436), P(26.39073850000000121, 72.225869670908153353),
|
||||
P(26.393878699999998361, 72.226187124115313054), P(26.400813700000000495, 72.226887965488728582),
|
||||
P(26.405969100000000083, 72.227408932782296347), P(26.434136200000001082, 72.23031015029567925),
|
||||
P(26.437651200000001239, 72.230672215773722655), P(26.439650799999999009, 72.230860300030158783),
|
||||
P(26.442400500000001529, 72.230918230849241013), P(26.444426599999999894, 72.230815518016711962),
|
||||
P(26.454957100000001446, 72.229639190945519545), P(26.455386699999998257, 72.229609273288744475),
|
||||
P(26.470600499999999755, 72.227804710557407475), P(26.485397899999998828, 72.226080035891357056),
|
||||
P(26.487313600000000235, 72.226084418502168205), P(26.488673999999999609, 72.226209799401686951),
|
||||
P(26.489974300000000085, 72.226456941463752059), P(26.493316499999998825, 72.227405883949458598),
|
||||
P(26.497907399999999001, 72.228727947008763977), P(26.507186099999998419, 72.231355762593423719),
|
||||
P(26.521764000000001005, 72.235531322949142918), P(26.522283200000000392, 72.235663963313356817),
|
||||
P(26.52274799999999999, 72.235808991367022713), P(26.523495799999999178, 72.236006428221017472),
|
||||
P(26.537509100000001183, 72.239985971537208798), P(26.540924100000001573, 72.240959309764491536),
|
||||
P(26.544420699999999869, 72.241674408812258434), P(26.546888100000000321, 72.242183101965366632),
|
||||
P(26.5518616999999999, 72.242874580127462991), P(26.562219100000000083, 72.244128903051048951),
|
||||
P(26.564274399999998622, 72.244315309516480283), P(26.576127799999998302, 72.245028538203385438),
|
||||
P(26.58263820000000166, 72.244424904560787581), P(26.591367999999999228, 72.243389190867901561),
|
||||
P(26.598972199999998622, 72.242452221067154028), P(26.600826200000000199, 72.242522931717928714),
|
||||
P(26.603627199999998254, 72.242683603364909573), P(26.606756300000000692, 72.243241096929352807),
|
||||
P(26.612569100000001754, 72.244800578667096147), P(26.615042299999998932, 72.246052459623328446),
|
||||
P(26.621848599999999863, 72.249011664844303482), P(26.627471299999999843, 72.250195383365820589),
|
||||
P(26.641823800000000944, 72.252710806698729584), P(26.648778100000001245, 72.254338371527666141),
|
||||
P(26.655288500000001051, 72.25700169234383452), P(26.660515000000000185, 72.259171735257126556),
|
||||
P(26.662390800000000723, 72.25996099777080417), P(26.670629300000001649, 72.263625851730935779),
|
||||
P(26.671595899999999801, 72.264267979553508781), P(26.676856199999999575, 72.267335711577246116),
|
||||
P(26.677412499999999085, 72.267929636079472289), P(26.676856199999999575, 72.267335711577246116)};
|
||||
} // namespace geometry_coding_tests
|
||||
9
libs/coding/coding_tests/test_polylines.hpp
Normal file
9
libs/coding/coding_tests/test_polylines.hpp
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
|
||||
namespace geometry_coding_tests
|
||||
{
|
||||
using P = m2::PointD;
|
||||
extern P arr1[376];
|
||||
} // namespace geometry_coding_tests
|
||||
132
libs/coding/coding_tests/text_storage_tests.cpp
Normal file
132
libs/coding/coding_tests/text_storage_tests.cpp
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/text_storage.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace coding;
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename Engine>
|
||||
string GenerateRandomString(Engine & engine)
|
||||
{
|
||||
int const kMinLength = 0;
|
||||
int const kMaxLength = 400;
|
||||
|
||||
int const kMinByte = 0;
|
||||
int const kMaxByte = 255;
|
||||
|
||||
uniform_int_distribution<int> length(kMinLength, kMaxLength);
|
||||
uniform_int_distribution<int> byte(kMinByte, kMaxByte);
|
||||
string s(length(engine), '\0');
|
||||
for (auto & b : s)
|
||||
b = byte(engine);
|
||||
return s;
|
||||
}
|
||||
|
||||
void DumpStrings(vector<string> const & strings, uint64_t blockSize, vector<uint8_t> & buffer)
|
||||
{
|
||||
MemWriter<vector<uint8_t>> writer(buffer);
|
||||
BlockedTextStorageWriter<decltype(writer)> ts(writer, blockSize);
|
||||
for (auto const & s : strings)
|
||||
ts.Append(s);
|
||||
}
|
||||
|
||||
UNIT_TEST(TextStorage_Smoke)
|
||||
{
|
||||
vector<uint8_t> buffer;
|
||||
DumpStrings({} /* strings */, 10 /* blockSize */, buffer);
|
||||
|
||||
{
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
BlockedTextStorageIndex index;
|
||||
index.Read(reader);
|
||||
TEST_EQUAL(index.GetNumStrings(), 0, ());
|
||||
TEST_EQUAL(index.GetNumBlockInfos(), 0, ());
|
||||
}
|
||||
|
||||
{
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
BlockedTextStorage<decltype(reader)> ts(reader);
|
||||
TEST_EQUAL(ts.GetNumStrings(), 0, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(TextStorage_Simple)
|
||||
{
|
||||
vector<string> const strings = {{"", "Hello", "Hello, World!", "Hola mundo", "Smoke test"}};
|
||||
|
||||
vector<uint8_t> buffer;
|
||||
DumpStrings(strings, 10 /* blockSize */, buffer);
|
||||
|
||||
{
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
BlockedTextStorageIndex index;
|
||||
index.Read(reader);
|
||||
TEST_EQUAL(index.GetNumStrings(), strings.size(), ());
|
||||
TEST_EQUAL(index.GetNumBlockInfos(), 3, ());
|
||||
}
|
||||
|
||||
{
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
BlockedTextStorage<decltype(reader)> ts(reader);
|
||||
TEST_EQUAL(ts.GetNumStrings(), strings.size(), ());
|
||||
for (size_t i = 0; i < ts.GetNumStrings(); ++i)
|
||||
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(TextStorage_Empty)
|
||||
{
|
||||
vector<string> strings;
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
strings.emplace_back(string(1 /* size */, i % 256));
|
||||
for (int j = 0; j < 1000; ++j)
|
||||
strings.emplace_back();
|
||||
}
|
||||
|
||||
vector<uint8_t> buffer;
|
||||
DumpStrings(strings, 5 /* blockSize */, buffer);
|
||||
|
||||
{
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
BlockedTextStorage<decltype(reader)> ts(reader);
|
||||
TEST_EQUAL(ts.GetNumStrings(), strings.size(), ());
|
||||
for (size_t i = 0; i < ts.GetNumStrings(); ++i)
|
||||
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(TextStorage_Random)
|
||||
{
|
||||
int const kSeed = 42;
|
||||
int const kNumStrings = 1000;
|
||||
int const kBlockSize = 100;
|
||||
mt19937 engine(kSeed);
|
||||
|
||||
vector<string> strings;
|
||||
for (int i = 0; i < kNumStrings; ++i)
|
||||
strings.push_back(GenerateRandomString(engine));
|
||||
|
||||
vector<uint8_t> buffer;
|
||||
DumpStrings(strings, kBlockSize, buffer);
|
||||
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
BlockedTextStorage<decltype(reader)> ts(reader);
|
||||
|
||||
TEST_EQUAL(ts.GetNumStrings(), strings.size(), ());
|
||||
for (size_t i = 0; i < ts.GetNumStrings(); ++i)
|
||||
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
|
||||
|
||||
for (size_t i = ts.GetNumStrings() - 1; i < ts.GetNumStrings(); --i)
|
||||
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
|
||||
}
|
||||
} // namespace
|
||||
157
libs/coding/coding_tests/traffic_test.cpp
Normal file
157
libs/coding/coding_tests/traffic_test.cpp
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/traffic.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
#include "geometry/point2d.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/math.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace traffic_test
|
||||
{
|
||||
using coding::TrafficGPSEncoder;
|
||||
using std::vector;
|
||||
|
||||
double CalculateLength(vector<TrafficGPSEncoder::DataPoint> const & path)
|
||||
{
|
||||
double res = 0;
|
||||
for (size_t i = 1; i < path.size(); ++i)
|
||||
{
|
||||
auto p1 = mercator::FromLatLon(path[i - 1].m_latLon.m_lat, path[i - 1].m_latLon.m_lon);
|
||||
auto p2 = mercator::FromLatLon(path[i].m_latLon.m_lat, path[i].m_latLon.m_lon);
|
||||
res += mercator::DistanceOnEarth(p1, p2);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void Test(vector<TrafficGPSEncoder::DataPoint> & points)
|
||||
{
|
||||
double constexpr kEps = 1e-5;
|
||||
|
||||
for (uint32_t version = 0; version <= TrafficGPSEncoder::kLatestVersion; ++version)
|
||||
{
|
||||
vector<uint8_t> buf;
|
||||
MemWriter<decltype(buf)> memWriter(buf);
|
||||
UNUSED_VALUE(TrafficGPSEncoder::SerializeDataPoints(version, memWriter, points));
|
||||
|
||||
vector<TrafficGPSEncoder::DataPoint> result;
|
||||
MemReader memReader(buf.data(), buf.size());
|
||||
ReaderSource<MemReader> src(memReader);
|
||||
TrafficGPSEncoder::DeserializeDataPoints(version, src, result);
|
||||
|
||||
TEST_EQUAL(points.size(), result.size(), ());
|
||||
for (size_t i = 0; i < points.size(); ++i)
|
||||
{
|
||||
TEST_EQUAL(points[i].m_timestamp, result[i].m_timestamp, (points[i].m_timestamp, result[i].m_timestamp));
|
||||
TEST(AlmostEqualAbsOrRel(points[i].m_latLon.m_lat, result[i].m_latLon.m_lat, kEps),
|
||||
(points[i].m_latLon.m_lat, result[i].m_latLon.m_lat));
|
||||
TEST(AlmostEqualAbsOrRel(points[i].m_latLon.m_lon, result[i].m_latLon.m_lon, kEps),
|
||||
(points[i].m_latLon.m_lon, result[i].m_latLon.m_lon));
|
||||
}
|
||||
|
||||
if (version == TrafficGPSEncoder::kLatestVersion)
|
||||
{
|
||||
LOG(LINFO,
|
||||
("path length =", CalculateLength(points), "num points =", points.size(), "compressed size =", buf.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_Smoke)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> data = {
|
||||
{0, ms::LatLon(0.0, 1.0), 1},
|
||||
{0, ms::LatLon(0.0, 2.0), 2},
|
||||
};
|
||||
Test(data);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_EmptyPath)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> data;
|
||||
Test(data);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_StraightLine100m)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> path = {
|
||||
{0, ms::LatLon(0.0, 0.0), 1},
|
||||
{0, ms::LatLon(0.0, 1e-3), 2},
|
||||
};
|
||||
Test(path);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_StraightLine50Km)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> path = {
|
||||
{0, ms::LatLon(0.0, 0.0), 1},
|
||||
{0, ms::LatLon(0.0, 0.5), 2},
|
||||
};
|
||||
Test(path);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_Zigzag500m)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> path;
|
||||
for (size_t i = 0; i < 5; ++i)
|
||||
{
|
||||
double const x = i * 1e-3;
|
||||
double const y = i % 2 == 0 ? 0 : 1e-3;
|
||||
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 3));
|
||||
}
|
||||
Test(path);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_Zigzag10Km)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> path;
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
double const x = i * 1e-2;
|
||||
double const y = i % 2 == 0 ? 0 : 1e-2;
|
||||
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 0));
|
||||
}
|
||||
Test(path);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_Zigzag100Km)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> path;
|
||||
for (size_t i = 0; i < 1000; ++i)
|
||||
{
|
||||
double const x = i * 1e-1;
|
||||
double const y = i % 2 == 0 ? 0 : 1e-1;
|
||||
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 0));
|
||||
}
|
||||
Test(path);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_Circle20KmRadius)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> path;
|
||||
size_t const n = 100;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
double const alpha = 2 * math::pi * i / n;
|
||||
double const radius = 0.25;
|
||||
double const x = radius * cos(alpha);
|
||||
double const y = radius * sin(alpha);
|
||||
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 0));
|
||||
}
|
||||
Test(path);
|
||||
}
|
||||
|
||||
UNIT_TEST(Traffic_Serialization_ExtremeLatLon)
|
||||
{
|
||||
vector<TrafficGPSEncoder::DataPoint> path = {
|
||||
{0, ms::LatLon(-90, -180), 0},
|
||||
{0, ms::LatLon(90, 180), 0},
|
||||
};
|
||||
Test(path);
|
||||
}
|
||||
} // namespace traffic_test
|
||||
215
libs/coding/coding_tests/url_tests.cpp
Normal file
215
libs/coding/coding_tests/url_tests.cpp
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/url.hpp"
|
||||
|
||||
#include "base/math.hpp"
|
||||
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace url_tests
|
||||
{
|
||||
using namespace std;
|
||||
using namespace url;
|
||||
|
||||
class TestUrl
|
||||
{
|
||||
public:
|
||||
explicit TestUrl(string && url) : m_url(std::move(url)) {}
|
||||
|
||||
TestUrl & Scheme(string && scheme)
|
||||
{
|
||||
m_scheme = std::move(scheme);
|
||||
return *this;
|
||||
}
|
||||
TestUrl & Host(string && host)
|
||||
{
|
||||
m_host = std::move(host);
|
||||
return *this;
|
||||
}
|
||||
TestUrl & Path(string && path)
|
||||
{
|
||||
m_path = std::move(path);
|
||||
return *this;
|
||||
}
|
||||
TestUrl & KV(string && key, string && value)
|
||||
{
|
||||
m_keyValuePairs.emplace(std::move(key), std::move(value));
|
||||
return *this;
|
||||
}
|
||||
|
||||
~TestUrl()
|
||||
{
|
||||
Url url(m_url);
|
||||
TEST_EQUAL(url.GetScheme(), m_scheme, ());
|
||||
TEST_EQUAL(url.GetHost(), m_host, ());
|
||||
TEST_EQUAL(url.GetPath(), m_path, ());
|
||||
|
||||
TEST(!m_scheme.empty() || !url.IsValid(), ("Scheme is empty if and only if url is invalid!"));
|
||||
|
||||
url.ForEachParam([this](string const & name, string const & value)
|
||||
{
|
||||
TEST(!m_keyValuePairs.empty(), ("Failed for url = ", m_url));
|
||||
TEST_EQUAL(m_keyValuePairs.front().first, name, ());
|
||||
TEST_EQUAL(m_keyValuePairs.front().second, value, ());
|
||||
m_keyValuePairs.pop();
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
string m_url, m_scheme, m_host, m_path;
|
||||
queue<pair<string, string>> m_keyValuePairs;
|
||||
};
|
||||
|
||||
char const * orig1 = "http://google.com/main_index.php";
|
||||
char const * enc1 = "http%3A%2F%2Fgoogle.com%2Fmain_index.php";
|
||||
char const * orig2 = "Some File Name.ext";
|
||||
char const * enc2 = "Some%20File%20Name.ext";
|
||||
char const * orig3 = "Wow, two spaces?!";
|
||||
char const * enc3 = "Wow%2C%20%20two%20spaces%3F%21";
|
||||
char const * orig4 = "#$%^&@~[]{}()|*+`\"\'";
|
||||
char const * enc4 = "%23%24%25%5E%26%40~%5B%5D%7B%7D%28%29%7C%2A%2B%60%22%27";
|
||||
|
||||
UNIT_TEST(Url_Join)
|
||||
{
|
||||
TEST_EQUAL("", Join("", ""), ());
|
||||
TEST_EQUAL("omim/", Join("", "omim/"), ());
|
||||
TEST_EQUAL("omim/", Join("omim/", ""), ());
|
||||
TEST_EQUAL("omim/strings", Join("omim", "strings"), ());
|
||||
TEST_EQUAL("omim/strings", Join("omim/", "strings"), ());
|
||||
TEST_EQUAL("../../omim/strings", Join("..", "..", "omim", "strings"), ());
|
||||
TEST_EQUAL("../../omim/strings", Join("../", "..", "omim/", "strings"), ());
|
||||
TEST_EQUAL("omim/strings", Join("omim/", "/strings"), ());
|
||||
TEST_EQUAL("../../omim/strings", Join("../", "/../", "/omim/", "/strings"), ());
|
||||
TEST_EQUAL("../omim/strings", Join("../", "", "/omim/", "/strings"), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Url_Encode)
|
||||
{
|
||||
TEST_EQUAL(UrlEncode(""), "", ());
|
||||
TEST_EQUAL(UrlEncode(" "), "%20", ());
|
||||
TEST_EQUAL(UrlEncode("%% "), "%25%25%20", ());
|
||||
TEST_EQUAL(UrlEncode("20"), "20", ());
|
||||
TEST_EQUAL(UrlEncode("Guinea-Bissau"), "Guinea-Bissau", ());
|
||||
TEST_EQUAL(UrlEncode(orig1), enc1, ());
|
||||
TEST_EQUAL(UrlEncode(orig2), enc2, ());
|
||||
TEST_EQUAL(UrlEncode(orig3), enc3, ());
|
||||
TEST_EQUAL(UrlEncode(orig4), enc4, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Url_Decode)
|
||||
{
|
||||
TEST_EQUAL(UrlDecode(""), "", ());
|
||||
TEST_EQUAL(UrlDecode("%20"), " ", ());
|
||||
TEST_EQUAL(UrlDecode("%25%25%20"), "%% ", ());
|
||||
TEST_EQUAL(UrlDecode("20"), "20", ());
|
||||
TEST_EQUAL(UrlDecode("Guinea-Bissau"), "Guinea-Bissau", ());
|
||||
TEST_EQUAL(UrlDecode(enc1), orig1, ());
|
||||
TEST_EQUAL(UrlDecode(enc2), orig2, ());
|
||||
TEST_EQUAL(UrlDecode(enc3), orig3, ());
|
||||
TEST_EQUAL(UrlDecode(enc4), orig4, ());
|
||||
TEST_EQUAL(UrlDecode("123+Main+St,+Seattle,+WA+98101"), "123 Main St, Seattle, WA 98101", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Url_Invalid)
|
||||
{
|
||||
TEST(!Url("").IsValid(), ());
|
||||
TEST(!Url(":/").IsValid(), ());
|
||||
TEST(!Url("//").IsValid(), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Url_Valid)
|
||||
{
|
||||
TestUrl("mapswithme://map?ll=10.3,12.3223&n=Hello%20World")
|
||||
.Scheme("mapswithme")
|
||||
.Host("map")
|
||||
.KV("ll", "10.3,12.3223")
|
||||
.KV("n", "Hello World");
|
||||
|
||||
TestUrl("cm:M&M//path?q=q&w=w").Scheme("cm").Host("M&M").Path("path").KV("q", "q").KV("w", "w");
|
||||
|
||||
TestUrl("http://www.sandwichparlour.com.au/").Scheme("http").Host("www.sandwichparlour.com.au").Path("");
|
||||
|
||||
TestUrl("cm:/&test").Scheme("cm").Host("&test").Path("");
|
||||
}
|
||||
|
||||
UNIT_TEST(Url_Fragment)
|
||||
{
|
||||
TestUrl("https://www.openstreetmap.org/way/179409926#map=19/46.34998/48.03213&layers=N")
|
||||
.Scheme("https")
|
||||
.Host("www.openstreetmap.org")
|
||||
.Path("way/179409926")
|
||||
.KV("map", "19/46.34998/48.03213")
|
||||
.KV("layers", "N");
|
||||
|
||||
TestUrl("https://www.openstreetmap.org/search?query=Falafel%20Sahyoun#map=16/33.89041/35.50664")
|
||||
.Scheme("https")
|
||||
.Host("www.openstreetmap.org")
|
||||
.Path("search")
|
||||
.KV("query", "Falafel Sahyoun")
|
||||
.KV("map", "16/33.89041/35.50664");
|
||||
}
|
||||
|
||||
UNIT_TEST(UrlScheme_Comprehensive)
|
||||
{
|
||||
TestUrl("");
|
||||
TestUrl("scheme:").Scheme("scheme").Host("").Path("");
|
||||
TestUrl("scheme:/").Scheme("scheme").Host("").Path("");
|
||||
TestUrl("scheme://").Scheme("scheme").Host("").Path("");
|
||||
TestUrl("sometext");
|
||||
TestUrl(":noscheme");
|
||||
TestUrl("://noscheme?");
|
||||
TestUrl("mwm://?").Scheme("mwm").Host("").Path("");
|
||||
TestUrl("http://host/path/to/something").Scheme("http").Host("host").Path("path/to/something");
|
||||
TestUrl("http://host?").Scheme("http").Host("host").Path("");
|
||||
TestUrl("maps://host?&&key=&").Scheme("maps").Host("host").KV("key", "");
|
||||
TestUrl("mapswithme://map?ll=1.2,3.4&z=15")
|
||||
.Scheme("mapswithme")
|
||||
.Host("map")
|
||||
.Path("")
|
||||
.KV("ll", "1.2,3.4")
|
||||
.KV("z", "15");
|
||||
TestUrl("nopathnovalues://?key1&key2=val2")
|
||||
.Scheme("nopathnovalues")
|
||||
.Host("")
|
||||
.Path("")
|
||||
.KV("key1", "")
|
||||
.KV("key2", "val2");
|
||||
TestUrl("s://?key1&key2").Scheme("s").Host("").Path("").KV("key1", "").KV("key2", "");
|
||||
TestUrl("g://h/p?key1=val1&key2=").Scheme("g").Host("h").Path("p").KV("key1", "val1").KV("key2", "");
|
||||
TestUrl("g://h?=val1&key2=").Scheme("g").Host("h").Path("").KV("", "val1").KV("key2", "");
|
||||
TestUrl("g://?k&key2").Scheme("g").Host("").Path("").KV("k", "").KV("key2", "");
|
||||
TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30")
|
||||
.Scheme("m")
|
||||
.Host("")
|
||||
.Path("")
|
||||
.KV("&Amp&=&Amp&", "")
|
||||
.KV("name", "1 0");
|
||||
TestUrl("s://?key1=value1&key1=value2&key1=value3&key2&key2&key3=value1&key3&key3=value2")
|
||||
.Scheme("s")
|
||||
.Host("")
|
||||
.Path("")
|
||||
.KV("key1", "value1")
|
||||
.KV("key1", "value2")
|
||||
.KV("key1", "value3")
|
||||
.KV("key2", "")
|
||||
.KV("key2", "")
|
||||
.KV("key3", "value1")
|
||||
.KV("key3", "")
|
||||
.KV("key3", "value2");
|
||||
}
|
||||
|
||||
UNIT_TEST(UrlApi_Smoke)
|
||||
{
|
||||
url::Url url("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15.232");
|
||||
TEST_EQUAL(url.GetScheme(), "https", ());
|
||||
TEST_EQUAL(url.GetHost(), "2gis.ru", ());
|
||||
TEST_EQUAL(url.GetPath(), "moscow/firm/4504127908589159", ());
|
||||
TEST_EQUAL(url.GetHostAndPath(), "2gis.ru/moscow/firm/4504127908589159", ());
|
||||
|
||||
TEST(url.GetLastParam(), ());
|
||||
TEST(url.GetParamValue("m"), ());
|
||||
}
|
||||
|
||||
} // namespace url_tests
|
||||
76
libs/coding/coding_tests/value_opt_string_test.cpp
Normal file
76
libs/coding/coding_tests/value_opt_string_test.cpp
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/value_opt_string.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <class T>
|
||||
void TestStringCodingT(T const * arr, size_t count, size_t maxSize)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
auto const ethalon = strings::to_string(arr[i]);
|
||||
|
||||
StringNumericOptimal s;
|
||||
s.Set(ethalon);
|
||||
|
||||
std::vector<char> buffer;
|
||||
MemWriter<std::vector<char>> w(buffer);
|
||||
|
||||
s.Write(w);
|
||||
|
||||
size_t const sz = buffer.size();
|
||||
TEST_GREATER(sz, 0, ());
|
||||
TEST_LESS_OR_EQUAL(sz, maxSize, ());
|
||||
|
||||
MemReader r(&buffer[0], sz);
|
||||
ReaderSource<MemReader> src(r);
|
||||
s.Read(src);
|
||||
|
||||
TEST_EQUAL(ethalon, s.Get(), ());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(StringNumericOptimal_Zero)
|
||||
{
|
||||
int t1 = 0;
|
||||
TestStringCodingT(&t1, 1, 1); // should be coded as VarUint
|
||||
|
||||
std::string t2 = "01";
|
||||
TestStringCodingT(&t2, 1, 3); // should be coded as String
|
||||
}
|
||||
|
||||
UNIT_TEST(StringNumericOptimal_IntCoding1)
|
||||
{
|
||||
int arr[] = {0, 1, 2, 666, 0x0FFFFFFF, 0x7FFFFFFF - 1, 0x7FFFFFFF};
|
||||
TestStringCodingT(arr, ARRAY_SIZE(arr), 5); // should be coded as VarUint
|
||||
}
|
||||
|
||||
UNIT_TEST(StringNumericOptimal_IntCoding2)
|
||||
{
|
||||
int arr[] = {-1, -2, -666666, static_cast<int>(0xFFFFFFFE), static_cast<int>(0xFFFFFFFF)};
|
||||
TestStringCodingT(arr, ARRAY_SIZE(arr), 12); // should be coded as String
|
||||
}
|
||||
|
||||
UNIT_TEST(StringNumericOptimal_StringCoding)
|
||||
{
|
||||
char const * arr[] = {"xxx", "yyy", "a", "0xFFFFFF", "123456UL"};
|
||||
TestStringCodingT(arr, ARRAY_SIZE(arr), 12); // should be coded as String
|
||||
}
|
||||
|
||||
UNIT_TEST(StringNumericOptimal_LargeStringCoding)
|
||||
{
|
||||
std::string s;
|
||||
fill_n(back_inserter(s), 10000, 'x');
|
||||
|
||||
TestStringCodingT(&s, 1, 10006);
|
||||
}
|
||||
69
libs/coding/coding_tests/var_record_reader_test.cpp
Normal file
69
libs/coding/coding_tests/var_record_reader_test.cpp
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/var_record_reader.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
struct SaveForEachParams
|
||||
{
|
||||
explicit SaveForEachParams(vector<pair<uint64_t, string>> & data) : m_data(data) {}
|
||||
|
||||
void operator()(uint64_t pos, vector<uint8_t> && data) const
|
||||
{
|
||||
m_data.emplace_back(pos, string(data.begin(), data.end()));
|
||||
}
|
||||
|
||||
vector<pair<uint64_t, string>> & m_data;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(VarRecordReader_Simple)
|
||||
{
|
||||
vector<uint8_t> data;
|
||||
char const longString[] =
|
||||
"0123456789012345678901234567890123456789012345678901234567890123456789"
|
||||
"012345678901234567890123456789012345678901234567890123456789012345";
|
||||
size_t const longStringSize = sizeof(longString) - 1;
|
||||
TEST_GREATER(longStringSize, 128, ());
|
||||
{
|
||||
MemWriter<vector<uint8_t>> writer(data);
|
||||
WriteVarUint(writer, 3U); // 0
|
||||
writer.Write("abc", 3); // 1
|
||||
WriteVarUint(writer, longStringSize); // 4
|
||||
writer.Write(longString, longStringSize); // 6
|
||||
WriteVarUint(writer, 4U); // 6 + longStringSize
|
||||
writer.Write("defg", 4); // 7 + longStringSize
|
||||
// 11 + longStringSize
|
||||
}
|
||||
|
||||
MemReader reader(&data[0], data.size());
|
||||
VarRecordReader<MemReader> recordReader(reader);
|
||||
|
||||
auto r = recordReader.ReadRecord(0);
|
||||
TEST_EQUAL(string(r.begin(), r.end()), "abc", ());
|
||||
|
||||
r = recordReader.ReadRecord(6 + longStringSize);
|
||||
TEST_EQUAL(string(r.begin(), r.end()), "defg", ());
|
||||
|
||||
r = recordReader.ReadRecord(4);
|
||||
TEST_EQUAL(string(r.begin(), r.end()), longString, ());
|
||||
|
||||
vector<pair<uint64_t, string>> forEachCalls;
|
||||
recordReader.ForEachRecord(SaveForEachParams(forEachCalls));
|
||||
vector<pair<uint64_t, string>> expectedForEachCalls = {{0, "abc"}, {4, longString}, {6 + longStringSize, "defg"}};
|
||||
TEST_EQUAL(forEachCalls, expectedForEachCalls, ());
|
||||
}
|
||||
118
libs/coding/coding_tests/var_serial_vector_test.cpp
Normal file
118
libs/coding/coding_tests/var_serial_vector_test.cpp
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/byte_stream.hpp"
|
||||
#include "coding/hex.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/var_serial_vector.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
char const kHexSerial[] =
|
||||
"03000000"
|
||||
"01000000"
|
||||
"04000000"
|
||||
"06000000"
|
||||
"616263646566";
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename ItT, typename TDstStream>
|
||||
void WriteVarSerialVector(ItT begin, ItT end, TDstStream & dst)
|
||||
{
|
||||
vector<uint32_t> offsets;
|
||||
uint32_t offset = 0;
|
||||
for (ItT it = begin; it != end; ++it)
|
||||
{
|
||||
offset += it->size() * sizeof((*it)[0]);
|
||||
offsets.push_back(offset);
|
||||
}
|
||||
|
||||
WriteToSink(dst, static_cast<uint32_t>(end - begin));
|
||||
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
WriteToSink(dst, offsets[i]);
|
||||
|
||||
for (ItT it = begin; it != end; ++it)
|
||||
{
|
||||
typename ItT::value_type const & v = *it;
|
||||
if (!v.empty())
|
||||
dst.Write(&v[0], v.size() * sizeof(v[0]));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(WriteSerial)
|
||||
{
|
||||
vector<string> elements;
|
||||
elements.push_back("a");
|
||||
elements.push_back("bcd");
|
||||
elements.push_back("ef");
|
||||
|
||||
string output;
|
||||
PushBackByteSink<string> sink(output);
|
||||
WriteVarSerialVector(elements.begin(), elements.end(), sink);
|
||||
|
||||
TEST_EQUAL(ToHex(output), kHexSerial, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(WriteSerialWithWriter)
|
||||
{
|
||||
string output;
|
||||
MemWriter<string> writer(output);
|
||||
VarSerialVectorWriter<MemWriter<string>> recordWriter(writer, 3);
|
||||
writer.Write("a", 1);
|
||||
recordWriter.FinishRecord();
|
||||
writer.Write("bcd", 3);
|
||||
recordWriter.FinishRecord();
|
||||
writer.Write("ef", 2);
|
||||
recordWriter.FinishRecord();
|
||||
TEST_EQUAL(ToHex(output), kHexSerial, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(ReadSerial)
|
||||
{
|
||||
string serial(FromHex(string(kHexSerial)));
|
||||
MemReader memReader(&serial[0], serial.size());
|
||||
ReaderSource<MemReader> memSource(memReader);
|
||||
VarSerialVectorReader<MemReader> reader(memSource);
|
||||
|
||||
TEST_EQUAL(reader.Read(0), "a", ());
|
||||
TEST_EQUAL(reader.Read(1), "bcd", ());
|
||||
TEST_EQUAL(reader.Read(2), "ef", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(EncodeDecode)
|
||||
{
|
||||
mt19937 rng(0);
|
||||
vector<string> elements;
|
||||
|
||||
for (size_t i = 0; i < 1024; ++i)
|
||||
{
|
||||
string s(1 + (rng() % 20), 0);
|
||||
for (size_t j = 0; j < s.size(); ++j)
|
||||
s[j] = static_cast<char>(rng() % 26) + 'a';
|
||||
elements.push_back(s);
|
||||
}
|
||||
|
||||
string serial;
|
||||
PushBackByteSink<string> sink(serial);
|
||||
WriteVarSerialVector(elements.begin(), elements.end(), sink);
|
||||
|
||||
MemReader memReader(serial.c_str(), serial.size());
|
||||
ReaderSource<MemReader> memSource(memReader);
|
||||
VarSerialVectorReader<MemReader> reader(memSource);
|
||||
|
||||
for (size_t i = 0; i < elements.size(); ++i)
|
||||
TEST_EQUAL(reader.Read(static_cast<uint32_t>(i)), elements[i], ());
|
||||
}
|
||||
214
libs/coding/coding_tests/varint_test.cpp
Normal file
214
libs/coding/coding_tests/varint_test.cpp
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/byte_stream.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
void TestVarUint(T const x)
|
||||
{
|
||||
vector<unsigned char> data;
|
||||
PushBackByteSink<vector<uint8_t>> dst(data);
|
||||
WriteVarUint(dst, x);
|
||||
|
||||
ArrayByteSource src(&data[0]);
|
||||
TEST_EQUAL(ReadVarUint<T>(src), x, ());
|
||||
|
||||
size_t const bytesRead = src.PtrUint8() - data.data();
|
||||
TEST_EQUAL(bytesRead, data.size(), (x));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TestVarInt(T const x)
|
||||
{
|
||||
vector<uint8_t> data;
|
||||
PushBackByteSink<vector<uint8_t>> dst(data);
|
||||
WriteVarInt(dst, x);
|
||||
|
||||
ArrayByteSource src(&data[0]);
|
||||
TEST_EQUAL(ReadVarInt<T>(src), x, ());
|
||||
|
||||
size_t const bytesRead = src.PtrUint8() - data.data();
|
||||
TEST_EQUAL(bytesRead, data.size(), (x));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(VarUint0)
|
||||
{
|
||||
// TestVarUint(static_cast<uint8_t>(0));
|
||||
// TestVarUint(static_cast<uint16_t>(0));
|
||||
TestVarUint(static_cast<uint32_t>(0));
|
||||
TestVarUint(static_cast<uint64_t>(0));
|
||||
}
|
||||
|
||||
UNIT_TEST(VarUintMinus1)
|
||||
{
|
||||
// TestVarUint(static_cast<uint8_t>(-1));
|
||||
// TestVarUint(static_cast<uint16_t>(-1));
|
||||
TestVarUint(static_cast<uint32_t>(-1));
|
||||
TestVarUint(static_cast<uint64_t>(-1));
|
||||
}
|
||||
|
||||
UNIT_TEST(VarUint32)
|
||||
{
|
||||
for (int b = 0; b <= 32; ++b)
|
||||
for (uint64_t i = (1ULL << b) - 3; i <= uint32_t(-1) && i <= (1ULL << b) + 147; ++i)
|
||||
TestVarUint(static_cast<uint32_t>(i));
|
||||
}
|
||||
|
||||
UNIT_TEST(VarInt32)
|
||||
{
|
||||
for (int b = 0; b <= 32; ++b)
|
||||
{
|
||||
for (uint64_t i = (1ULL << b) - 3; i <= uint32_t(-1) && i <= (1ULL << b) + 147; ++i)
|
||||
{
|
||||
TestVarInt(static_cast<int32_t>(i));
|
||||
TestVarInt(static_cast<int32_t>(-i));
|
||||
}
|
||||
}
|
||||
|
||||
int const bound = 10000;
|
||||
for (int i = -bound; i <= bound; ++i)
|
||||
TestVarInt(static_cast<int32_t>(i));
|
||||
|
||||
for (int i = 0; i <= bound; ++i)
|
||||
TestVarUint(static_cast<uint32_t>(i));
|
||||
}
|
||||
|
||||
UNIT_TEST(VarIntSize)
|
||||
{
|
||||
vector<unsigned char> data;
|
||||
PushBackByteSink<vector<unsigned char>> dst(data);
|
||||
WriteVarInt(dst, 60);
|
||||
TEST_EQUAL(data.size(), 1, ());
|
||||
data.clear();
|
||||
WriteVarInt(dst, -60);
|
||||
TEST_EQUAL(data.size(), 1, ());
|
||||
data.clear();
|
||||
WriteVarInt(dst, 120);
|
||||
TEST_EQUAL(data.size(), 2, ());
|
||||
data.clear();
|
||||
WriteVarInt(dst, -120);
|
||||
TEST_EQUAL(data.size(), 2, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(VarIntMax)
|
||||
{
|
||||
TestVarUint(uint32_t(-1));
|
||||
TestVarUint(uint64_t(-1));
|
||||
TestVarInt(int32_t(2147483647));
|
||||
TestVarInt(int32_t(-2147483648LL));
|
||||
TestVarInt(int64_t(9223372036854775807LL));
|
||||
// TestVarInt(int64_t(-9223372036854775808LL));
|
||||
}
|
||||
|
||||
UNIT_TEST(ReadVarInt64Array_EmptyArray)
|
||||
{
|
||||
vector<int64_t> result;
|
||||
void const * pEnd = ReadVarInt64Array(NULL, (void *)0, base::MakeBackInsertFunctor(result));
|
||||
TEST_EQUAL(result, vector<int64_t>(), ("UntilBufferEnd"));
|
||||
TEST_EQUAL(reinterpret_cast<uintptr_t>(pEnd), 0, ("UntilBufferEnd"));
|
||||
pEnd = ReadVarInt64Array(NULL, (size_t)0, base::MakeBackInsertFunctor(result));
|
||||
TEST_EQUAL(result, vector<int64_t>(), ("GivenSize"));
|
||||
TEST_EQUAL(reinterpret_cast<uintptr_t>(pEnd), 0, ("GivenSize"));
|
||||
}
|
||||
|
||||
UNIT_TEST(ReadVarInt64Array)
|
||||
{
|
||||
vector<int64_t> values;
|
||||
|
||||
// Fill in values.
|
||||
{
|
||||
int64_t const baseValues[] = {0,
|
||||
127,
|
||||
128,
|
||||
(2 << 28) - 1,
|
||||
(2 << 28),
|
||||
(2LL << 31),
|
||||
(2LL << 31) - 1,
|
||||
0xFFFFFFFF - 1,
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFFFFFULL};
|
||||
for (size_t i = 0; i < ARRAY_SIZE(baseValues); ++i)
|
||||
{
|
||||
values.push_back(baseValues[i]);
|
||||
values.push_back(-baseValues[i]);
|
||||
}
|
||||
sort(values.begin(), values.end());
|
||||
values.erase(unique(values.begin(), values.end()), values.end());
|
||||
}
|
||||
|
||||
// Test all subsets.
|
||||
for (size_t i = 1; i < 1U << values.size(); ++i)
|
||||
{
|
||||
vector<int64_t> testValues;
|
||||
for (size_t j = 0; j < values.size(); ++j)
|
||||
if (i & (1 << j))
|
||||
testValues.push_back(values[j]);
|
||||
|
||||
vector<unsigned char> data;
|
||||
{
|
||||
PushBackByteSink<vector<unsigned char>> dst(data);
|
||||
for (size_t j = 0; j < testValues.size(); ++j)
|
||||
WriteVarInt(dst, testValues[j]);
|
||||
}
|
||||
|
||||
ASSERT_GREATER(data.size(), 0, ());
|
||||
{
|
||||
// Factor out variables here to show the obvious compiler error.
|
||||
// clang 3.5, loop optimization.
|
||||
/// @todo Need to check with the new XCode (and clang) update.
|
||||
|
||||
void const * pDataStart = &data[0];
|
||||
void const * pDataEnd = &data[0] + data.size();
|
||||
|
||||
vector<int64_t> result;
|
||||
void const * pEnd = ReadVarInt64Array(pDataStart, pDataEnd, base::MakeBackInsertFunctor(result));
|
||||
|
||||
TEST_EQUAL(pEnd, pDataEnd, ("UntilBufferEnd", data.size()));
|
||||
TEST_EQUAL(result, testValues, ("UntilBufferEnd", data.size()));
|
||||
}
|
||||
{
|
||||
vector<int64_t> result;
|
||||
void const * pEnd = ReadVarInt64Array(&data[0], testValues.size(), base::MakeBackInsertFunctor(result));
|
||||
|
||||
TEST_EQUAL(pEnd, &data[0] + data.size(), ("GivenSize", data.size()));
|
||||
TEST_EQUAL(result, testValues, ("GivenSize", data.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(VarInt_ShortSortedArray)
|
||||
{
|
||||
uint32_t constexpr maxVal = (uint32_t(1) << 30) - 1;
|
||||
std::vector<uint32_t> samples[] = {
|
||||
{0},
|
||||
{10, 10000},
|
||||
{maxVal - 2, maxVal - 1, maxVal},
|
||||
};
|
||||
|
||||
for (auto const & s : samples)
|
||||
{
|
||||
std::vector<uint8_t> buffer;
|
||||
PushBackByteSink sink(buffer);
|
||||
|
||||
WriteVarUInt32SortedShortArray(s, sink);
|
||||
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
ReaderSource src(reader);
|
||||
|
||||
std::vector<uint32_t> actual;
|
||||
ReadVarUInt32SortedShortArray(src, actual);
|
||||
|
||||
TEST_EQUAL(s, actual, ());
|
||||
}
|
||||
}
|
||||
259
libs/coding/coding_tests/writer_test.cpp
Normal file
259
libs/coding/coding_tests/writer_test.cpp
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/buffered_file_writer.hpp"
|
||||
#include "coding/file_reader.hpp"
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/internal/file_data.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
static char const kTestWriteStr[] = "01234567";
|
||||
|
||||
template <class WriterT>
|
||||
void TestWrite(WriterT & writer)
|
||||
{
|
||||
writer.Write("01", 2); // "01"
|
||||
TEST_EQUAL(writer.Pos(), 2, ());
|
||||
writer.Write("x", 1); // "01x"
|
||||
TEST_EQUAL(writer.Pos(), 3, ());
|
||||
writer.Write("3", 1); // "01x3"
|
||||
TEST_EQUAL(writer.Pos(), 4, ());
|
||||
writer.Seek(2);
|
||||
TEST_EQUAL(writer.Pos(), 2, ());
|
||||
writer.Write("2", 1); // "0123"
|
||||
TEST_EQUAL(writer.Pos(), 3, ());
|
||||
writer.Seek(7);
|
||||
TEST_EQUAL(writer.Pos(), 7, ());
|
||||
writer.Write("7", 1); // "0123???7"
|
||||
TEST_EQUAL(writer.Pos(), 8, ());
|
||||
writer.Seek(4);
|
||||
TEST_EQUAL(writer.Pos(), 4, ());
|
||||
writer.Write("45", 2); // "012345?7"
|
||||
writer.Write("6", 1); // "01234567"
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(MemWriter_Smoke)
|
||||
{
|
||||
vector<char> s;
|
||||
MemWriter<vector<char>> writer(s);
|
||||
TestWrite(writer);
|
||||
TEST_EQUAL(string(s.begin(), s.end()), kTestWriteStr, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(FileWriter_Smoke)
|
||||
{
|
||||
char const fileName[] = "file_writer_smoke_test.tmp";
|
||||
{
|
||||
FileWriter writer(fileName);
|
||||
TestWrite(writer);
|
||||
}
|
||||
vector<char> s;
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
s.resize(reader.Size());
|
||||
reader.Read(0, &s[0], reader.Size());
|
||||
}
|
||||
TEST_EQUAL(string(s.begin(), s.end()), kTestWriteStr, ());
|
||||
FileWriter::DeleteFileX(fileName);
|
||||
}
|
||||
|
||||
UNIT_TEST(SubWriter_MemWriter_Smoke)
|
||||
{
|
||||
vector<char> s;
|
||||
MemWriter<vector<char>> writer(s);
|
||||
writer.Write("aa", 2);
|
||||
{
|
||||
SubWriter<MemWriter<vector<char>>> subWriter(writer);
|
||||
TestWrite(subWriter);
|
||||
}
|
||||
writer.Write("bb", 2);
|
||||
TEST_EQUAL(string(s.begin(), s.end()), "aa" + string(kTestWriteStr) + "bb", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(SubWriter_FileWriter_Smoke)
|
||||
{
|
||||
char const fileName[] = "sub_file_writer_smoke_test.tmp";
|
||||
{
|
||||
FileWriter writer(fileName);
|
||||
writer.Write("aa", 2);
|
||||
{
|
||||
SubWriter<FileWriter> subWriter(writer);
|
||||
TestWrite(subWriter);
|
||||
}
|
||||
writer.Write("bb", 2);
|
||||
}
|
||||
vector<char> s;
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
s.resize(reader.Size());
|
||||
reader.Read(0, &s[0], reader.Size());
|
||||
}
|
||||
TEST_EQUAL(string(s.begin(), s.end()), "aa" + string(kTestWriteStr) + "bb", ());
|
||||
FileWriter::DeleteFileX(fileName);
|
||||
}
|
||||
|
||||
UNIT_TEST(FileWriter_DeleteFile)
|
||||
{
|
||||
char const fileName[] = "delete_file_test";
|
||||
{
|
||||
FileWriter writer(fileName);
|
||||
writer.Write("123", 3);
|
||||
}
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
TEST_EQUAL(reader.Size(), 3, ());
|
||||
}
|
||||
FileWriter::DeleteFileX(fileName);
|
||||
try
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
TEST(false, ("Exception should be thrown!"));
|
||||
}
|
||||
catch (FileReader::OpenException &)
|
||||
{}
|
||||
}
|
||||
|
||||
UNIT_TEST(FileWriter_AppendAndOpenExisting)
|
||||
{
|
||||
char const fileName[] = "append_openexisting_file_test";
|
||||
{
|
||||
FileWriter writer(fileName);
|
||||
}
|
||||
{
|
||||
FileWriter writer(fileName, FileWriter::OP_WRITE_EXISTING);
|
||||
TEST_EQUAL(writer.Size(), 0, ());
|
||||
writer.Write("abcd", 4);
|
||||
}
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
TEST_EQUAL(reader.Size(), 4, ());
|
||||
string s(static_cast<uint32_t>(reader.Size()), 0);
|
||||
reader.Read(0, &s[0], s.size());
|
||||
TEST_EQUAL(s, "abcd", ());
|
||||
}
|
||||
{
|
||||
FileWriter writer(fileName);
|
||||
writer.Write("123", 3);
|
||||
}
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
TEST_EQUAL(reader.Size(), 3, ());
|
||||
}
|
||||
{
|
||||
FileWriter writer(fileName, FileWriter::OP_APPEND);
|
||||
writer.Write("4", 1);
|
||||
}
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
TEST_EQUAL(reader.Size(), 4, ());
|
||||
string s(static_cast<uint32_t>(reader.Size()), 0);
|
||||
reader.Read(0, &s[0], s.size());
|
||||
TEST_EQUAL(s, "1234", ());
|
||||
}
|
||||
{
|
||||
FileWriter writer(fileName, FileWriter::OP_WRITE_EXISTING);
|
||||
TEST_EQUAL(writer.Size(), 4, ());
|
||||
writer.Write("56", 2);
|
||||
}
|
||||
{
|
||||
FileReader reader(fileName);
|
||||
TEST_EQUAL(reader.Size(), 4, ());
|
||||
string s(static_cast<uint32_t>(reader.Size()), 0);
|
||||
reader.Read(0, &s[0], 4);
|
||||
TEST_EQUAL(s, "5634", ());
|
||||
}
|
||||
FileWriter::DeleteFileX(fileName);
|
||||
}
|
||||
|
||||
size_t const CHUNK_SIZE = 1024;
|
||||
size_t const CHUNKS_COUNT = 21;
|
||||
string const TEST_STRING = "Some Test String";
|
||||
|
||||
void WriteTestData1(Writer & w)
|
||||
{
|
||||
w.Seek(CHUNKS_COUNT * CHUNK_SIZE);
|
||||
w.Write(TEST_STRING.data(), TEST_STRING.size());
|
||||
}
|
||||
|
||||
void WriteTestData2(Writer & w)
|
||||
{
|
||||
char c[CHUNK_SIZE];
|
||||
for (size_t i = 1; i < CHUNKS_COUNT; i += 2)
|
||||
{
|
||||
for (size_t j = 0; j < ARRAY_SIZE(c); ++j)
|
||||
c[j] = i;
|
||||
w.Seek(i * CHUNK_SIZE);
|
||||
w.Write(&c[0], ARRAY_SIZE(c));
|
||||
}
|
||||
for (size_t i = 0; i < CHUNKS_COUNT; i += 2)
|
||||
{
|
||||
for (size_t j = 0; j < ARRAY_SIZE(c); ++j)
|
||||
c[j] = i;
|
||||
w.Seek(i * CHUNK_SIZE);
|
||||
w.Write(&c[0], ARRAY_SIZE(c));
|
||||
}
|
||||
}
|
||||
|
||||
void ReadTestData(Reader & r)
|
||||
{
|
||||
string s;
|
||||
r.ReadAsString(s);
|
||||
for (size_t i = 0; i < CHUNKS_COUNT; ++i)
|
||||
for (size_t j = 0; j < CHUNK_SIZE; ++j)
|
||||
TEST_EQUAL(s[i * CHUNK_SIZE + j], static_cast<char>(i), (i, j));
|
||||
string const sub = s.substr(CHUNKS_COUNT * CHUNK_SIZE);
|
||||
TEST_EQUAL(sub, TEST_STRING, (sub, TEST_STRING));
|
||||
}
|
||||
template <typename WriterType>
|
||||
void WriteToFileAndTest()
|
||||
{
|
||||
string const TEST_FILE = "FileWriter_Chunks.test";
|
||||
{
|
||||
WriterType fileWriter(TEST_FILE, FileWriter::OP_WRITE_TRUNCATE);
|
||||
WriteTestData1(fileWriter);
|
||||
}
|
||||
{
|
||||
WriterType fileWriter(TEST_FILE, FileWriter::OP_WRITE_EXISTING);
|
||||
WriteTestData2(fileWriter);
|
||||
}
|
||||
{
|
||||
FileReader r(TEST_FILE);
|
||||
ReadTestData(r);
|
||||
}
|
||||
FileWriter::DeleteFileX(TEST_FILE);
|
||||
}
|
||||
|
||||
UNIT_TEST(FileWriter_Chunks)
|
||||
{
|
||||
WriteToFileAndTest<FileWriter>();
|
||||
}
|
||||
|
||||
UNIT_TEST(BufferedFileWriter_Smoke)
|
||||
{
|
||||
WriteToFileAndTest<BufferedFileWriter>();
|
||||
}
|
||||
|
||||
UNIT_TEST(MemWriter_Chunks)
|
||||
{
|
||||
string buffer;
|
||||
{
|
||||
MemWriter<string> memWriter(buffer);
|
||||
WriteTestData1(memWriter);
|
||||
}
|
||||
{
|
||||
MemWriter<string> memWriter(buffer);
|
||||
WriteTestData2(memWriter);
|
||||
}
|
||||
{
|
||||
MemReader r(buffer.data(), buffer.size());
|
||||
ReadTestData(r);
|
||||
}
|
||||
}
|
||||
119
libs/coding/coding_tests/xml_parser_tests.cpp
Normal file
119
libs/coding/coding_tests/xml_parser_tests.cpp
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/parse_xml.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace
|
||||
{
|
||||
std::string const smokeXml = R"(
|
||||
<root>
|
||||
</root>
|
||||
)";
|
||||
|
||||
std::string const longXml = R"(
|
||||
<root>
|
||||
<ruler>
|
||||
<portrait>
|
||||
<anchor vertical="bottom" horizontal="left"/>
|
||||
<offset x="10"/>
|
||||
</portrait>
|
||||
</ruler>
|
||||
<compass>
|
||||
<portrait>
|
||||
<anchor vertical="center"/>
|
||||
<relative vertical="top"/>
|
||||
</portrait>
|
||||
<landscape>
|
||||
<relative vertical="top"/>
|
||||
<offset x="34" y="48"/>
|
||||
</landscape>
|
||||
</compass>
|
||||
</root>
|
||||
)";
|
||||
|
||||
class SmokeDispatcher
|
||||
{
|
||||
public:
|
||||
void CharData(std::string const &) {}
|
||||
void AddAttr(char const *, char const *) {}
|
||||
bool Push(std::string_view push)
|
||||
{
|
||||
TEST_EQUAL(push, "root", ());
|
||||
return true;
|
||||
}
|
||||
void Pop(std::string_view pop) { TEST_EQUAL(pop, "root", ()); }
|
||||
};
|
||||
|
||||
class Dispatcher
|
||||
{
|
||||
public:
|
||||
using PairsOfStrings = std::vector<std::pair<std::string, std::string>>;
|
||||
using Strings = std::vector<std::string>;
|
||||
|
||||
void CharData(std::string const & ch) {}
|
||||
|
||||
void AddAttr(std::string key, std::string value) { m_addAttrs.emplace_back(std::move(key), std::move(value)); }
|
||||
|
||||
bool Push(std::string push)
|
||||
{
|
||||
m_pushes.emplace_back(std::move(push));
|
||||
return true;
|
||||
}
|
||||
|
||||
void Pop(std::string pop) { m_pops.emplace_back(std::move(pop)); }
|
||||
|
||||
void TestAddAttrs(PairsOfStrings const & addAttrs) { TestEquality(m_addAttrs, addAttrs); }
|
||||
|
||||
void TestPushes(Strings const & pushes) { TestEquality(m_pushes, pushes); }
|
||||
|
||||
void TestPops(Strings const & pops) { TestEquality(m_pops, pops); }
|
||||
|
||||
private:
|
||||
template <typename F>
|
||||
void TestEquality(F const & f1, F const & f2)
|
||||
{
|
||||
TEST_EQUAL(f1.size(), f2.size(), ());
|
||||
for (size_t i = 0; i < f1.size(); ++i)
|
||||
TEST_EQUAL(f1[i], f2[i], (i));
|
||||
}
|
||||
|
||||
PairsOfStrings m_addAttrs;
|
||||
Strings m_pushes;
|
||||
Strings m_pops;
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
void TestXML(std::string const & xmlStr, D & dispatcher)
|
||||
{
|
||||
std::vector<uint8_t> xml(xmlStr.cbegin(), xmlStr.cend());
|
||||
MemReader reader(xml.data(), xml.size());
|
||||
ReaderSource<MemReader> source(reader);
|
||||
|
||||
ParseXML(source, dispatcher);
|
||||
}
|
||||
|
||||
UNIT_TEST(XmlParser_SmokeTest)
|
||||
{
|
||||
Dispatcher d;
|
||||
TestXML(smokeXml, d);
|
||||
d.TestAddAttrs({});
|
||||
d.TestPushes({"root"});
|
||||
d.TestPops({"root"});
|
||||
}
|
||||
|
||||
UNIT_TEST(XmlParser_LongTest)
|
||||
{
|
||||
Dispatcher d;
|
||||
TestXML(longXml, d);
|
||||
d.TestAddAttrs({std::make_pair("vertical", "bottom"), std::make_pair("horizontal", "left"), std::make_pair("x", "10"),
|
||||
std::make_pair("vertical", "center"), std::make_pair("vertical", "top"),
|
||||
std::make_pair("vertical", "top"), std::make_pair("x", "34"), std::make_pair("y", "48")});
|
||||
d.TestPushes({"root", "ruler", "portrait", "anchor", "offset", "compass", "portrait", "anchor", "relative",
|
||||
"landscape", "relative", "offset"});
|
||||
d.TestPops({"anchor", "offset", "portrait", "ruler", "anchor", "relative", "portrait", "relative", "offset",
|
||||
"landscape", "compass", "root"});
|
||||
}
|
||||
} // namespace
|
||||
154
libs/coding/coding_tests/zip_creator_test.cpp
Normal file
154
libs/coding/coding_tests/zip_creator_test.cpp
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/constants.hpp"
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/internal/file_data.hpp"
|
||||
#include "coding/zip_creator.hpp"
|
||||
#include "coding/zip_reader.hpp"
|
||||
|
||||
#include "base/scope_guard.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace
|
||||
{
|
||||
void CreateAndTestZip(std::string const & filePath, std::string const & zipPath)
|
||||
{
|
||||
TEST(CreateZipFromFiles({filePath}, zipPath, CompressionLevel::DefaultCompression), ());
|
||||
|
||||
ZipFileReader::FileList files;
|
||||
ZipFileReader::FilesList(zipPath, files);
|
||||
TEST_EQUAL(files[0].second, FileReader(filePath).Size(), ());
|
||||
|
||||
std::string const unzippedFile = "unzipped.tmp";
|
||||
ZipFileReader::UnzipFile(zipPath, files[0].first, unzippedFile);
|
||||
|
||||
TEST(base::IsEqualFiles(filePath, unzippedFile), ());
|
||||
TEST(base::DeleteFileX(filePath), ());
|
||||
TEST(base::DeleteFileX(zipPath), ());
|
||||
TEST(base::DeleteFileX(unzippedFile), ());
|
||||
}
|
||||
|
||||
void CreateAndTestZip(std::vector<std::string> const & files, std::string const & zipPath, CompressionLevel compression)
|
||||
{
|
||||
TEST(CreateZipFromFiles(files, zipPath, compression), ());
|
||||
|
||||
ZipFileReader::FileList fileList;
|
||||
ZipFileReader::FilesList(zipPath, fileList);
|
||||
std::string const unzippedFile = "unzipped.tmp";
|
||||
for (size_t i = 0; i < files.size(); ++i)
|
||||
{
|
||||
TEST_EQUAL(fileList[i].second, FileReader(files[i]).Size(), ());
|
||||
|
||||
ZipFileReader::UnzipFile(zipPath, fileList[i].first, unzippedFile);
|
||||
|
||||
TEST(base::IsEqualFiles(files[i], unzippedFile), ());
|
||||
TEST(base::DeleteFileX(unzippedFile), ());
|
||||
}
|
||||
TEST(base::DeleteFileX(zipPath), ());
|
||||
}
|
||||
|
||||
void CreateAndTestZipWithFolder(std::vector<std::string> const & files, std::vector<std::string> const & filesInArchive,
|
||||
std::string const & zipPath, CompressionLevel compression)
|
||||
{
|
||||
TEST(CreateZipFromFiles(files, filesInArchive, zipPath, compression), ());
|
||||
|
||||
ZipFileReader::FileList fileList;
|
||||
ZipFileReader::FilesList(zipPath, fileList);
|
||||
std::string const unzippedFile = "unzipped.tmp";
|
||||
for (size_t i = 0; i < files.size(); ++i)
|
||||
{
|
||||
TEST_EQUAL(fileList[i].second, FileReader(files[i]).Size(), ());
|
||||
|
||||
ZipFileReader::UnzipFile(zipPath, fileList[i].first, unzippedFile);
|
||||
|
||||
TEST(base::IsEqualFiles(files[i], unzippedFile), ());
|
||||
TEST(base::DeleteFileX(unzippedFile), ());
|
||||
}
|
||||
TEST(base::DeleteFileX(zipPath), ());
|
||||
}
|
||||
|
||||
std::vector<CompressionLevel> GetCompressionLevels()
|
||||
{
|
||||
return {CompressionLevel::DefaultCompression, CompressionLevel::BestCompression, CompressionLevel::BestSpeed,
|
||||
CompressionLevel::NoCompression};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(CreateZip_BigFile)
|
||||
{
|
||||
std::string const name = "testfileforzip.txt";
|
||||
|
||||
{
|
||||
FileWriter f(name);
|
||||
std::string s(READ_FILE_BUFFER_SIZE + 1, '1');
|
||||
f.Write(s.c_str(), s.size());
|
||||
}
|
||||
|
||||
CreateAndTestZip(name, "testzip.zip");
|
||||
}
|
||||
|
||||
UNIT_TEST(CreateZip_Smoke)
|
||||
{
|
||||
std::string const name = "testfileforzip.txt";
|
||||
|
||||
{
|
||||
FileWriter f(name);
|
||||
f.Write(name.c_str(), name.size());
|
||||
}
|
||||
|
||||
CreateAndTestZip(name, "testzip.zip");
|
||||
}
|
||||
|
||||
UNIT_TEST(CreateZip_MultipleFiles)
|
||||
{
|
||||
std::vector<std::string> const fileData{"testf1", "testfile2", "testfile3_longname.txt.xml.csv"};
|
||||
SCOPE_GUARD(deleteFileGuard, [&fileData]()
|
||||
{
|
||||
for (auto const & file : fileData)
|
||||
TEST(base::DeleteFileX(file), ());
|
||||
});
|
||||
|
||||
for (auto const & name : fileData)
|
||||
{
|
||||
FileWriter f(name);
|
||||
f.Write(name.c_str(), name.size());
|
||||
}
|
||||
|
||||
for (auto compression : GetCompressionLevels())
|
||||
CreateAndTestZip(fileData, "testzip.zip", compression);
|
||||
}
|
||||
|
||||
UNIT_TEST(CreateZip_MultipleFilesWithFolders)
|
||||
{
|
||||
std::vector<std::string> const fileData{"testf1", "testfile2", "testfile3_longname.txt.xml.csv"};
|
||||
std::vector<std::string> const fileInArchiveData{"testf1", "f2/testfile2", "f3/testfile3_longname.txt.xml.csv"};
|
||||
SCOPE_GUARD(deleteFileGuard, [&fileData]()
|
||||
{
|
||||
for (auto const & file : fileData)
|
||||
TEST(base::DeleteFileX(file), ());
|
||||
});
|
||||
|
||||
for (auto const & name : fileData)
|
||||
{
|
||||
FileWriter f(name);
|
||||
f.Write(name.c_str(), name.size());
|
||||
}
|
||||
|
||||
for (auto compression : GetCompressionLevels())
|
||||
CreateAndTestZipWithFolder(fileData, fileInArchiveData, "testzip.zip", compression);
|
||||
}
|
||||
|
||||
UNIT_TEST(CreateZip_MultipleFilesSingleEmpty)
|
||||
{
|
||||
std::vector<std::string> const fileData{"singleEmptyfile.txt"};
|
||||
SCOPE_GUARD(deleteFileGuard, [&fileData]() { TEST(base::DeleteFileX(fileData[0]), ()); });
|
||||
|
||||
{
|
||||
FileWriter f(fileData[0]);
|
||||
}
|
||||
|
||||
for (auto compression : GetCompressionLevels())
|
||||
CreateAndTestZip(fileData, "testzip.zip", compression);
|
||||
}
|
||||
224
libs/coding/coding_tests/zip_reader_test.cpp
Normal file
224
libs/coding/coding_tests/zip_reader_test.cpp
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/zip_reader.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <exception>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
static char const zipBytes[] =
|
||||
"PK\003\004\n\0\0\0\0\0\222\226\342>\302\032"
|
||||
"x\372\005\0\0\0\005\0\0\0\b\0\034\0te"
|
||||
"st.txtUT\t\0\003\303>\017N\017"
|
||||
"?\017Nux\v\0\001\004\365\001\0\0\004P\0"
|
||||
"\0\0Test\nPK\001\002\036\003\n\0\0"
|
||||
"\0\0\0\222\226\342>\302\032x\372\005\0\0\0\005"
|
||||
"\0\0\0\b\0\030\0\0\0\0\0\0\0\0\0\244"
|
||||
"\201\0\0\0\0test.txtUT\005"
|
||||
"\0\003\303>\017Nux\v\0\001\004\365\001\0\0"
|
||||
"\004P\0\0\0PK\005\006\0\0\0\0\001\0\001"
|
||||
"\0N\0\0\0G\0\0\0\0\0";
|
||||
|
||||
UNIT_TEST(ZipReaderSmoke)
|
||||
{
|
||||
string const ZIPFILE = "smoke_test.zip";
|
||||
{
|
||||
FileWriter f(ZIPFILE);
|
||||
f.Write(zipBytes, ARRAY_SIZE(zipBytes) - 1);
|
||||
}
|
||||
|
||||
bool noException = true;
|
||||
try
|
||||
{
|
||||
ZipFileReader r(ZIPFILE, "test.txt");
|
||||
string s;
|
||||
r.ReadAsString(s);
|
||||
TEST_EQUAL(s, "Test\n", ("Invalid zip file contents"));
|
||||
}
|
||||
catch (exception const & e)
|
||||
{
|
||||
noException = false;
|
||||
LOG(LERROR, (e.what()));
|
||||
}
|
||||
TEST(noException, ("Unhandled exception"));
|
||||
|
||||
// invalid zip
|
||||
noException = true;
|
||||
try
|
||||
{
|
||||
ZipFileReader r("some_nonexisting_filename", "test.txt");
|
||||
}
|
||||
catch (exception const &)
|
||||
{
|
||||
noException = false;
|
||||
}
|
||||
TEST(!noException, ());
|
||||
|
||||
// invalid file inside zip
|
||||
noException = true;
|
||||
try
|
||||
{
|
||||
ZipFileReader r(ZIPFILE, "test");
|
||||
}
|
||||
catch (exception const &)
|
||||
{
|
||||
noException = false;
|
||||
}
|
||||
TEST(!noException, ());
|
||||
|
||||
FileWriter::DeleteFileX(ZIPFILE);
|
||||
}
|
||||
|
||||
/// zip file with 3 files inside: 1.txt, 2.txt, 3.ttt
|
||||
static char const zipBytes2[] =
|
||||
"\x50\x4b\x3\x4\xa\x0\x0\x0\x0\x0\x92\x6b\xf6\x3e\x53\xfc\x51\x67\x2\x0\x0"
|
||||
"\x0\x2\x0\x0\x0\x5\x0\x1c\x0\x31\x2e\x74\x78\x74\x55\x54\x9\x0\x3\xd3\x50\x29\x4e\xd4\x50\x29\x4e\x75\x78"
|
||||
"\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x31\xa\x50\x4b\x3\x4\xa\x0\x0\x0\x0\x0\x95\x6b\xf6\x3e\x90\xaf"
|
||||
"\x7c\x4c\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x1c\x0\x32\x2e\x74\x78\x74\x55\x54\x9\x0\x3\xd9\x50\x29\x4e\xd9\x50"
|
||||
"\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x32\xa\x50\x4b\x3\x4\xa\x0\x0\x0\x0\x0\x9c\x6b"
|
||||
"\xf6\x3e\xd1\x9e\x67\x55\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x1c\x0\x33\x2e\x74\x74\x74\x55\x54\x9\x0\x3\xe8\x50"
|
||||
"\x29\x4e\xe9\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x33\xa\x50\x4b\x1\x2\x1e\x3\xa"
|
||||
"\x0\x0\x0\x0\x0\x92\x6b\xf6\x3e\x53\xfc\x51\x67\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x18\x0\x0\x0\x0\x0\x1\x0\x0"
|
||||
"\x0\xa4\x81\x0\x0\x0\x0\x31\x2e\x74\x78\x74\x55\x54\x5\x0\x3\xd3\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0"
|
||||
"\x0\x4\x14\x0\x0\x0\x50\x4b\x1\x2\x1e\x3\xa\x0\x0\x0\x0\x0\x95\x6b\xf6\x3e\x90\xaf\x7c\x4c\x2\x0\x0\x0\x2"
|
||||
"\x0\x0\x0\x5\x0\x18\x0\x0\x0\x0\x0\x1\x0\x0\x0\xa4\x81\x41\x0\x0\x0\x32\x2e\x74\x78\x74\x55\x54\x5\x0\x3"
|
||||
"\xd9\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x50\x4b\x1\x2\x1e\x3\xa\x0\x0\x0\x0\x0"
|
||||
"\x9c\x6b\xf6\x3e\xd1\x9e\x67\x55\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x18\x0\x0\x0\x0\x0\x1\x0\x0\x0\xa4\x81\x82"
|
||||
"\x0\x0\x0\x33\x2e\x74\x74\x74\x55\x54\x5\x0\x3\xe8\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0"
|
||||
"\x0\x0\x50\x4b\x5\x6\x0\x0\x0\x0\x3\x0\x3\x0\xe1\x0\x0\x0\xc3\x0\x0\x0\x0\x0";
|
||||
|
||||
static char const invalidZip[] = "1234567890asdqwetwezxvcbdhg322353tgfsd";
|
||||
|
||||
UNIT_TEST(ZipFilesList)
|
||||
{
|
||||
string const ZIPFILE = "list_test.zip";
|
||||
{
|
||||
FileWriter f(ZIPFILE);
|
||||
f.Write(zipBytes2, ARRAY_SIZE(zipBytes2) - 1);
|
||||
}
|
||||
TEST(ZipFileReader::IsZip(ZIPFILE), ());
|
||||
string const ZIPFILE_INVALID = "invalid_test.zip";
|
||||
{
|
||||
FileWriter f(ZIPFILE_INVALID);
|
||||
f.Write(invalidZip, ARRAY_SIZE(invalidZip) - 1);
|
||||
}
|
||||
TEST(!ZipFileReader::IsZip(ZIPFILE_INVALID), ());
|
||||
|
||||
try
|
||||
{
|
||||
ZipFileReader::FileList files;
|
||||
ZipFileReader::FilesList(ZIPFILE, files);
|
||||
|
||||
TEST_EQUAL(files.size(), 3, ());
|
||||
TEST_EQUAL(files[0].first, "1.txt", ());
|
||||
TEST_EQUAL(files[0].second, 2, ());
|
||||
TEST_EQUAL(files[1].first, "2.txt", ());
|
||||
TEST_EQUAL(files[1].second, 2, ());
|
||||
TEST_EQUAL(files[2].first, "3.ttt", ());
|
||||
TEST_EQUAL(files[2].second, 2, ());
|
||||
}
|
||||
catch (exception const & e)
|
||||
{
|
||||
TEST(false, ("Can't get list of files inside zip", e.what()));
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
ZipFileReader::FileList files;
|
||||
ZipFileReader::FilesList(ZIPFILE_INVALID, files);
|
||||
TEST(false, ("This test shouldn't be reached - exception should be thrown"));
|
||||
}
|
||||
catch (exception const &)
|
||||
{}
|
||||
|
||||
FileWriter::DeleteFileX(ZIPFILE_INVALID);
|
||||
FileWriter::DeleteFileX(ZIPFILE);
|
||||
}
|
||||
|
||||
/// Compressed zip file with 2 files in assets folder:
|
||||
/// assets/aaaaaaaaaa.txt (contains text "aaaaaaaaaa\x0A")
|
||||
/// assets/holalala.txt (contains text "Holalala\x0A")
|
||||
static char const zipBytes3[] =
|
||||
"\x50\x4B\x03\x04\x14\x00\x02\x00\x08\x00\xAF\x96\x56\x40\x42\xE5\x26\x8F\x06\x00"
|
||||
"\x00\x00\x0B\x00\x00\x00\x15\x00\x1C\x00\x61\x73\x73\x65\x74\x73\x2F\x61\x61\x61"
|
||||
"\x61\x61\x61\x61\x61\x61\x61\x2E\x74\x78\x74\x55\x54\x09\x00\x03\x7A\x0F\x45\x4F"
|
||||
"\xD8\x0F\x45\x4F\x75\x78\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00\x00\x00\x4B"
|
||||
"\x4C\x84\x01\x2E\x00\x50\x4B\x03\x04\x14\x00\x02\x00\x08\x00\xE6\x96\x56\x40\x5E"
|
||||
"\x76\x90\x07\x08\x00\x00\x00\x09\x00\x00\x00\x13\x00\x1C\x00\x61\x73\x73\x65\x74"
|
||||
"\x73\x2F\x68\x6F\x6C\x61\x6C\x61\x6C\x61\x2E\x74\x78\x74\x55\x54\x09\x00\x03\xDF"
|
||||
"\x0F\x45\x4F\xDC\x0F\x45\x4F\x75\x78\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00"
|
||||
"\x00\x00\xF3\xC8\xCF\x49\x04\x41\x2E\x00\x50\x4B\x01\x02\x1E\x03\x14\x00\x02\x00"
|
||||
"\x08\x00\xAF\x96\x56\x40\x42\xE5\x26\x8F\x06\x00\x00\x00\x0B\x00\x00\x00\x15\x00"
|
||||
"\x18\x00\x00\x00\x00\x00\x01\x00\x00\x00\xA4\x81\x00\x00\x00\x00\x61\x73\x73\x65"
|
||||
"\x74\x73\x2F\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x2E\x74\x78\x74\x55\x54\x05"
|
||||
"\x00\x03\x7A\x0F\x45\x4F\x75\x78\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00\x00"
|
||||
"\x00\x50\x4B\x01\x02\x1E\x03\x14\x00\x02\x00\x08\x00\xE6\x96\x56\x40\x5E\x76\x90"
|
||||
"\x07\x08\x00\x00\x00\x09\x00\x00\x00\x13\x00\x18\x00\x00\x00\x00\x00\x01\x00\x00"
|
||||
"\x00\xA4\x81\x55\x00\x00\x00\x61\x73\x73\x65\x74\x73\x2F\x68\x6F\x6C\x61\x6C\x61"
|
||||
"\x6C\x61\x2E\x74\x78\x74\x55\x54\x05\x00\x03\xDF\x0F\x45\x4F\x75\x78\x0B\x00\x01"
|
||||
"\x04\xF5\x01\x00\x00\x04\x14\x00\x00\x00\x50\x4B\x05\x06\x00\x00\x00\x00\x02\x00"
|
||||
"\x02\x00\xB4\x00\x00\x00\xAA\x00\x00\x00\x00\x00";
|
||||
|
||||
UNIT_TEST(ZipExtract)
|
||||
{
|
||||
string const ZIPFILE = "test.zip";
|
||||
{
|
||||
FileWriter f(ZIPFILE);
|
||||
f.Write(zipBytes3, ARRAY_SIZE(zipBytes3));
|
||||
}
|
||||
TEST(ZipFileReader::IsZip(ZIPFILE), ("Not a zip file"));
|
||||
|
||||
ZipFileReader::FileList files;
|
||||
ZipFileReader::FilesList(ZIPFILE, files);
|
||||
TEST_EQUAL(files.size(), 2, ());
|
||||
|
||||
string const OUTFILE = "out.tmp";
|
||||
string s;
|
||||
ZipFileReader::UnzipFile(ZIPFILE, files[0].first, OUTFILE);
|
||||
{
|
||||
FileReader(OUTFILE).ReadAsString(s);
|
||||
}
|
||||
TEST_EQUAL(s, "aaaaaaaaaa\x0A", ());
|
||||
// OUTFILE should be rewritten correctly in the next lines
|
||||
ZipFileReader::UnzipFile(ZIPFILE, files[1].first, OUTFILE);
|
||||
{
|
||||
FileReader(OUTFILE).ReadAsString(s);
|
||||
}
|
||||
TEST_EQUAL(s, "Holalala\x0A", ());
|
||||
FileWriter::DeleteFileX(OUTFILE);
|
||||
|
||||
FileWriter::DeleteFileX(ZIPFILE);
|
||||
}
|
||||
|
||||
UNIT_TEST(ZipFileSizes)
|
||||
{
|
||||
string const ZIPFILE = "test.zip";
|
||||
{
|
||||
FileWriter f(ZIPFILE);
|
||||
f.Write(zipBytes3, ARRAY_SIZE(zipBytes3));
|
||||
}
|
||||
TEST(ZipFileReader::IsZip(ZIPFILE), ("Not a zip file"));
|
||||
|
||||
ZipFileReader::FileList files;
|
||||
ZipFileReader::FilesList(ZIPFILE, files);
|
||||
TEST_EQUAL(files.size(), 2, ());
|
||||
|
||||
{
|
||||
ZipFileReader file(ZIPFILE, files[0].first);
|
||||
TEST_EQUAL(file.Size(), 6, ());
|
||||
TEST_EQUAL(file.UncompressedSize(), 11, ());
|
||||
}
|
||||
|
||||
{
|
||||
ZipFileReader file(ZIPFILE, files[1].first);
|
||||
TEST_EQUAL(file.Size(), 8, ());
|
||||
TEST_EQUAL(file.UncompressedSize(), 9, ());
|
||||
}
|
||||
|
||||
FileWriter::DeleteFileX(ZIPFILE);
|
||||
}
|
||||
103
libs/coding/coding_tests/zlib_test.cpp
Normal file
103
libs/coding/coding_tests/zlib_test.cpp
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/zlib.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace coding;
|
||||
using namespace std;
|
||||
|
||||
using Deflate = ZLib::Deflate;
|
||||
using Inflate = ZLib::Inflate;
|
||||
|
||||
pair<Deflate::Format, Inflate::Format> const g_combinations[] = {{Deflate::Format::ZLib, Inflate::Format::ZLib},
|
||||
{Deflate::Format::ZLib, Inflate::Format::Both},
|
||||
{Deflate::Format::GZip, Inflate::Format::GZip},
|
||||
{Deflate::Format::GZip, Inflate::Format::Both}};
|
||||
|
||||
namespace
|
||||
{
|
||||
void TestDeflateInflate(string const & original)
|
||||
{
|
||||
for (auto const & p : g_combinations)
|
||||
{
|
||||
Deflate const deflate(p.first /* format */, Deflate::Level::BestCompression);
|
||||
Inflate const inflate(p.second /* format */);
|
||||
|
||||
string compressed;
|
||||
TEST(deflate(original, back_inserter(compressed)), ());
|
||||
|
||||
string decompressed;
|
||||
TEST(inflate(compressed, back_inserter(decompressed)), ());
|
||||
|
||||
TEST_EQUAL(original, decompressed, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(ZLib_Smoke)
|
||||
{
|
||||
Deflate const deflate(Deflate::Format::ZLib, Deflate::Level::BestCompression);
|
||||
Inflate const inflate(Inflate::Format::ZLib);
|
||||
|
||||
{
|
||||
string s;
|
||||
TEST(!deflate(nullptr /* data */, 0 /* size */, back_inserter(s) /* out */), ());
|
||||
TEST(!deflate(nullptr /* data */, 4 /* size */, back_inserter(s) /* out */), ());
|
||||
TEST(!inflate(nullptr /* data */, 0 /* size */, back_inserter(s) /* out */), ());
|
||||
TEST(!inflate(nullptr /* data */, 4 /* size */, back_inserter(s) /* out */), ());
|
||||
}
|
||||
|
||||
TestDeflateInflate("");
|
||||
TestDeflateInflate("Hello, World!");
|
||||
}
|
||||
|
||||
UNIT_TEST(ZLib_Large)
|
||||
{
|
||||
string original;
|
||||
for (size_t i = 0; i < 1000; ++i)
|
||||
original += strings::to_string(i);
|
||||
|
||||
TestDeflateInflate(original);
|
||||
}
|
||||
|
||||
UNIT_TEST(GZip_ForeignData)
|
||||
{
|
||||
// To get this array of bytes, type following:
|
||||
//
|
||||
// echo -n 'Hello World!' | gzip -c | od -t x1
|
||||
uint8_t const data[] = {0x1f, 0x8b, 0x08, 0x08, 0x6d, 0x55, 0x08, 0x59, 0x00, 0x03, 0x73, 0x61, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x2e, 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x08, 0xcf,
|
||||
0x2f, 0xca, 0x49, 0x51, 0x04, 0x00, 0xd0, 0xc3, 0x4a, 0xec, 0x0d, 0x00, 0x00, 0x00};
|
||||
|
||||
string s;
|
||||
|
||||
Inflate const inflate(Inflate::Format::GZip);
|
||||
TEST(inflate(data, ARRAY_SIZE(data), back_inserter(s)), ());
|
||||
TEST_EQUAL(s, "Hello, World!", ());
|
||||
}
|
||||
|
||||
UNIT_TEST(GZip_ExtraDataInBuffer)
|
||||
{
|
||||
// Data from GZip_ForeignData + extra \n at the end of the buffer.
|
||||
uint8_t const data[] = {0x1f, 0x8b, 0x08, 0x08, 0x6d, 0x55, 0x08, 0x59, 0x00, 0x03, 0x73, 0x61, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x2e, 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x08, 0xcf,
|
||||
0x2f, 0xca, 0x49, 0x51, 0x04, 0x00, 0xd0, 0xc3, 0x4a, 0xec, 0x0d, 0x00, 0x00, 0x00, 0x0a};
|
||||
|
||||
string s;
|
||||
|
||||
Inflate const inflate(Inflate::Format::GZip);
|
||||
// inflate should fail becase there is unconsumed data at the end of buffer.
|
||||
TEST(!inflate(data, ARRAY_SIZE(data), back_inserter(s)), ());
|
||||
// inflate should decompress everything but the last byte.
|
||||
TEST_EQUAL(s, "Hello, World!", ());
|
||||
}
|
||||
} // namespace
|
||||
487
libs/coding/compressed_bit_vector.cpp
Normal file
487
libs/coding/compressed_bit_vector.cpp
Normal file
|
|
@ -0,0 +1,487 @@
|
|||
#include "coding/compressed_bit_vector.hpp"
|
||||
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
using std::make_unique, std::max, std::min, std::unique_ptr, std::vector;
|
||||
|
||||
namespace
|
||||
{
|
||||
struct IntersectOp
|
||||
{
|
||||
IntersectOp() {}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::DenseCBV const & b) const
|
||||
{
|
||||
size_t const sizeA = a.NumBitGroups();
|
||||
size_t const sizeB = b.NumBitGroups();
|
||||
vector<uint64_t> resGroups(min(sizeA, sizeB));
|
||||
for (size_t i = 0; i < resGroups.size(); ++i)
|
||||
resGroups[i] = a.GetBitGroup(i) & b.GetBitGroup(i);
|
||||
return coding::CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
|
||||
}
|
||||
|
||||
// The intersection of dense and sparse is always sparse.
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::SparseCBV const & b) const
|
||||
{
|
||||
vector<uint64_t> resPos;
|
||||
for (size_t i = 0; i < b.PopCount(); ++i)
|
||||
{
|
||||
auto pos = b.Select(i);
|
||||
if (a.GetBit(pos))
|
||||
resPos.push_back(pos);
|
||||
}
|
||||
return make_unique<coding::SparseCBV>(std::move(resPos));
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::DenseCBV const & b) const
|
||||
{
|
||||
return operator()(b, a);
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::SparseCBV const & b) const
|
||||
{
|
||||
vector<uint64_t> resPos;
|
||||
set_intersection(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
|
||||
return make_unique<coding::SparseCBV>(std::move(resPos));
|
||||
}
|
||||
};
|
||||
|
||||
struct SubtractOp
|
||||
{
|
||||
SubtractOp() {}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::DenseCBV const & b) const
|
||||
{
|
||||
size_t const sizeA = a.NumBitGroups();
|
||||
size_t const sizeB = b.NumBitGroups();
|
||||
vector<uint64_t> resGroups(min(sizeA, sizeB));
|
||||
for (size_t i = 0; i < resGroups.size(); ++i)
|
||||
resGroups[i] = a.GetBitGroup(i) & ~b.GetBitGroup(i);
|
||||
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::SparseCBV const & b) const
|
||||
{
|
||||
vector<uint64_t> resGroups(a.NumBitGroups());
|
||||
|
||||
size_t i = 0;
|
||||
auto j = b.Begin();
|
||||
for (; i < resGroups.size() && j < b.End(); ++i)
|
||||
{
|
||||
uint64_t const kBitsBegin = i * DenseCBV::kBlockSize;
|
||||
uint64_t const kBitsEnd = (i + 1) * DenseCBV::kBlockSize;
|
||||
|
||||
uint64_t mask = 0;
|
||||
for (; j < b.End() && *j < kBitsEnd; ++j)
|
||||
{
|
||||
ASSERT_GREATER_OR_EQUAL(*j, kBitsBegin, ());
|
||||
mask |= static_cast<uint64_t>(1) << (*j - kBitsBegin);
|
||||
}
|
||||
|
||||
resGroups[i] = a.GetBitGroup(i) & ~mask;
|
||||
}
|
||||
|
||||
for (; i < resGroups.size(); ++i)
|
||||
resGroups[i] = a.GetBitGroup(i);
|
||||
|
||||
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::DenseCBV const & b) const
|
||||
{
|
||||
vector<uint64_t> resPos;
|
||||
copy_if(a.Begin(), a.End(), back_inserter(resPos), [&](uint64_t bit) { return !b.GetBit(bit); });
|
||||
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::SparseCBV const & b) const
|
||||
{
|
||||
vector<uint64_t> resPos;
|
||||
set_difference(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
|
||||
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
|
||||
}
|
||||
};
|
||||
|
||||
struct UnionOp
|
||||
{
|
||||
UnionOp() {}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::DenseCBV const & b) const
|
||||
{
|
||||
size_t const sizeA = a.NumBitGroups();
|
||||
size_t const sizeB = b.NumBitGroups();
|
||||
|
||||
size_t commonSize = min(sizeA, sizeB);
|
||||
size_t resultSize = max(sizeA, sizeB);
|
||||
vector<uint64_t> resGroups(resultSize);
|
||||
for (size_t i = 0; i < commonSize; ++i)
|
||||
resGroups[i] = a.GetBitGroup(i) | b.GetBitGroup(i);
|
||||
if (a.NumBitGroups() == resultSize)
|
||||
for (size_t i = commonSize; i < resultSize; ++i)
|
||||
resGroups[i] = a.GetBitGroup(i);
|
||||
else
|
||||
for (size_t i = commonSize; i < resultSize; ++i)
|
||||
resGroups[i] = b.GetBitGroup(i);
|
||||
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::SparseCBV const & b) const
|
||||
{
|
||||
size_t const sizeA = a.NumBitGroups();
|
||||
size_t const sizeB =
|
||||
b.PopCount() == 0
|
||||
? 0
|
||||
: static_cast<size_t>((b.Select(static_cast<size_t>(b.PopCount() - 1)) + DenseCBV::kBlockSize - 1) /
|
||||
DenseCBV::kBlockSize);
|
||||
if (sizeB > sizeA)
|
||||
{
|
||||
vector<uint64_t> resPos;
|
||||
auto j = b.Begin();
|
||||
auto merge = [&](uint64_t va)
|
||||
{
|
||||
while (j < b.End() && *j < va)
|
||||
{
|
||||
resPos.push_back(*j);
|
||||
++j;
|
||||
}
|
||||
resPos.push_back(va);
|
||||
};
|
||||
a.ForEach(merge);
|
||||
for (; j < b.End(); ++j)
|
||||
resPos.push_back(*j);
|
||||
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
|
||||
}
|
||||
|
||||
vector<uint64_t> resGroups(sizeA);
|
||||
|
||||
size_t i = 0;
|
||||
auto j = b.Begin();
|
||||
for (; i < sizeA || j < b.End(); ++i)
|
||||
{
|
||||
uint64_t const kBitsBegin = i * DenseCBV::kBlockSize;
|
||||
uint64_t const kBitsEnd = (i + 1) * DenseCBV::kBlockSize;
|
||||
|
||||
uint64_t mask = i < sizeA ? a.GetBitGroup(i) : 0;
|
||||
for (; j < b.End() && *j < kBitsEnd; ++j)
|
||||
{
|
||||
ASSERT_GREATER_OR_EQUAL(*j, kBitsBegin, ());
|
||||
mask |= static_cast<uint64_t>(1) << (*j - kBitsBegin);
|
||||
}
|
||||
|
||||
resGroups[i] = mask;
|
||||
}
|
||||
|
||||
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::DenseCBV const & b) const
|
||||
{
|
||||
return operator()(b, a);
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::SparseCBV const & b) const
|
||||
{
|
||||
vector<uint64_t> resPos;
|
||||
set_union(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
|
||||
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TBinaryOp>
|
||||
unique_ptr<coding::CompressedBitVector> Apply(TBinaryOp const & op, CompressedBitVector const & lhs,
|
||||
CompressedBitVector const & rhs)
|
||||
{
|
||||
using strat = CompressedBitVector::StorageStrategy;
|
||||
auto const stratA = lhs.GetStorageStrategy();
|
||||
auto const stratB = rhs.GetStorageStrategy();
|
||||
if (stratA == strat::Dense && stratB == strat::Dense)
|
||||
{
|
||||
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
|
||||
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
|
||||
return op(a, b);
|
||||
}
|
||||
if (stratA == strat::Dense && stratB == strat::Sparse)
|
||||
{
|
||||
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
|
||||
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
|
||||
return op(a, b);
|
||||
}
|
||||
if (stratA == strat::Sparse && stratB == strat::Dense)
|
||||
{
|
||||
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
|
||||
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
|
||||
return op(a, b);
|
||||
}
|
||||
if (stratA == strat::Sparse && stratB == strat::Sparse)
|
||||
{
|
||||
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
|
||||
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
|
||||
return op(a, b);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Returns true if a bit vector with popCount bits set out of totalBits
|
||||
// is fit to be represented as a DenseCBV. Note that we do not
|
||||
// account for possible irregularities in the distribution of bits.
|
||||
// In particular, we do not break the bit vector into blocks that are
|
||||
// stored separately although this might turn out to be a good idea.
|
||||
bool DenseEnough(uint64_t popCount, uint64_t totalBits)
|
||||
{
|
||||
// Settle at 30% for now.
|
||||
return popCount * 10 >= totalBits * 3;
|
||||
}
|
||||
|
||||
template <typename TBitPositions>
|
||||
unique_ptr<CompressedBitVector> BuildFromBitPositions(TBitPositions && setBits)
|
||||
{
|
||||
if (setBits.empty())
|
||||
return make_unique<SparseCBV>(std::forward<TBitPositions>(setBits));
|
||||
uint64_t const maxBit = *max_element(setBits.begin(), setBits.end());
|
||||
|
||||
if (DenseEnough(setBits.size(), maxBit))
|
||||
return make_unique<DenseCBV>(std::forward<TBitPositions>(setBits));
|
||||
|
||||
return make_unique<SparseCBV>(std::forward<TBitPositions>(setBits));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
uint64_t const DenseCBV::kBlockSize;
|
||||
|
||||
DenseCBV::DenseCBV(vector<uint64_t> const & setBits)
|
||||
{
|
||||
if (setBits.empty())
|
||||
return;
|
||||
uint64_t const maxBit = *max_element(setBits.begin(), setBits.end());
|
||||
size_t const sz = 1 + static_cast<size_t>(maxBit / kBlockSize);
|
||||
m_bitGroups.resize(sz);
|
||||
m_popCount = static_cast<uint64_t>(setBits.size());
|
||||
for (uint64_t pos : setBits)
|
||||
m_bitGroups[static_cast<size_t>(pos / kBlockSize)] |= static_cast<uint64_t>(1) << (pos % kBlockSize);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<DenseCBV> DenseCBV::BuildFromBitGroups(vector<uint64_t> && bitGroups)
|
||||
{
|
||||
unique_ptr<DenseCBV> cbv(new DenseCBV());
|
||||
cbv->m_popCount = 0;
|
||||
for (size_t i = 0; i < bitGroups.size(); ++i)
|
||||
cbv->m_popCount += std::popcount(bitGroups[i]);
|
||||
cbv->m_bitGroups = std::move(bitGroups);
|
||||
return cbv;
|
||||
}
|
||||
|
||||
uint64_t DenseCBV::GetBitGroup(size_t i) const
|
||||
{
|
||||
return i < m_bitGroups.size() ? m_bitGroups[i] : 0;
|
||||
}
|
||||
|
||||
uint64_t DenseCBV::PopCount() const
|
||||
{
|
||||
return m_popCount;
|
||||
}
|
||||
|
||||
bool DenseCBV::GetBit(uint64_t pos) const
|
||||
{
|
||||
uint64_t bitGroup = GetBitGroup(static_cast<size_t>(pos / kBlockSize));
|
||||
return ((bitGroup >> (pos % kBlockSize)) & 1) > 0;
|
||||
}
|
||||
|
||||
unique_ptr<CompressedBitVector> DenseCBV::LeaveFirstSetNBits(uint64_t n) const
|
||||
{
|
||||
if (PopCount() <= n)
|
||||
return Clone();
|
||||
|
||||
vector<uint64_t> groups;
|
||||
for (size_t i = 0; i < m_bitGroups.size() && n != 0; ++i)
|
||||
{
|
||||
uint64_t group = m_bitGroups[i];
|
||||
uint32_t const bits = std::popcount(group);
|
||||
if (bits <= n)
|
||||
{
|
||||
n -= bits;
|
||||
groups.push_back(group);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t part = 0;
|
||||
while (n != 0)
|
||||
{
|
||||
part = part | (group & -group);
|
||||
group = group & (group - 1);
|
||||
--n;
|
||||
}
|
||||
groups.push_back(part);
|
||||
}
|
||||
}
|
||||
return CompressedBitVectorBuilder::FromBitGroups(std::move(groups));
|
||||
}
|
||||
|
||||
CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const
|
||||
{
|
||||
return CompressedBitVector::StorageStrategy::Dense;
|
||||
}
|
||||
|
||||
void DenseCBV::Serialize(Writer & writer) const
|
||||
{
|
||||
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
|
||||
WriteToSink(writer, header);
|
||||
rw::WriteVectorOfPOD(writer, m_bitGroups);
|
||||
}
|
||||
|
||||
unique_ptr<CompressedBitVector> DenseCBV::Clone() const
|
||||
{
|
||||
DenseCBV * cbv = new DenseCBV();
|
||||
cbv->m_popCount = m_popCount;
|
||||
cbv->m_bitGroups = m_bitGroups;
|
||||
return unique_ptr<CompressedBitVector>(cbv);
|
||||
}
|
||||
|
||||
SparseCBV::SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
|
||||
{
|
||||
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
|
||||
}
|
||||
|
||||
SparseCBV::SparseCBV(vector<uint64_t> && setBits) : m_positions(std::move(setBits))
|
||||
{
|
||||
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
|
||||
}
|
||||
|
||||
uint64_t SparseCBV::Select(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, m_positions.size(), ());
|
||||
return m_positions[i];
|
||||
}
|
||||
|
||||
uint64_t SparseCBV::PopCount() const
|
||||
{
|
||||
return m_positions.size();
|
||||
}
|
||||
|
||||
bool SparseCBV::GetBit(uint64_t pos) const
|
||||
{
|
||||
return binary_search(m_positions.begin(), m_positions.end(), pos);
|
||||
}
|
||||
|
||||
unique_ptr<CompressedBitVector> SparseCBV::LeaveFirstSetNBits(uint64_t n) const
|
||||
{
|
||||
if (PopCount() <= n)
|
||||
return Clone();
|
||||
vector<uint64_t> positions(m_positions.begin(), m_positions.begin() + static_cast<size_t>(n));
|
||||
return CompressedBitVectorBuilder::FromBitPositions(std::move(positions));
|
||||
}
|
||||
|
||||
CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const
|
||||
{
|
||||
return CompressedBitVector::StorageStrategy::Sparse;
|
||||
}
|
||||
|
||||
void SparseCBV::Serialize(Writer & writer) const
|
||||
{
|
||||
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
|
||||
WriteToSink(writer, header);
|
||||
rw::WriteVectorOfPOD(writer, m_positions);
|
||||
}
|
||||
|
||||
unique_ptr<CompressedBitVector> SparseCBV::Clone() const
|
||||
{
|
||||
SparseCBV * cbv = new SparseCBV();
|
||||
cbv->m_positions = m_positions;
|
||||
return unique_ptr<CompressedBitVector>(cbv);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(vector<uint64_t> const & setBits)
|
||||
{
|
||||
return BuildFromBitPositions(setBits);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(vector<uint64_t> && setBits)
|
||||
{
|
||||
return BuildFromBitPositions(std::move(setBits));
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(vector<uint64_t> && bitGroups)
|
||||
{
|
||||
static uint64_t constexpr kBlockSize = DenseCBV::kBlockSize;
|
||||
|
||||
while (!bitGroups.empty() && bitGroups.back() == 0)
|
||||
bitGroups.pop_back();
|
||||
if (bitGroups.empty())
|
||||
return make_unique<SparseCBV>(std::move(bitGroups));
|
||||
|
||||
uint64_t const maxBit = kBlockSize * (bitGroups.size() - 1) + bits::FloorLog(bitGroups.back());
|
||||
uint64_t popCount = 0;
|
||||
for (size_t i = 0; i < bitGroups.size(); ++i)
|
||||
popCount += std::popcount(bitGroups[i]);
|
||||
|
||||
if (DenseEnough(popCount, maxBit))
|
||||
return DenseCBV::BuildFromBitGroups(std::move(bitGroups));
|
||||
|
||||
vector<uint64_t> setBits;
|
||||
for (size_t i = 0; i < bitGroups.size(); ++i)
|
||||
{
|
||||
for (size_t j = 0; j < kBlockSize; ++j)
|
||||
if (((bitGroups[i] >> j) & 1) > 0)
|
||||
setBits.push_back(kBlockSize * i + j);
|
||||
}
|
||||
return make_unique<SparseCBV>(setBits);
|
||||
}
|
||||
|
||||
std::string DebugPrint(CompressedBitVector::StorageStrategy strat)
|
||||
{
|
||||
switch (strat)
|
||||
{
|
||||
case CompressedBitVector::StorageStrategy::Dense: return "Dense";
|
||||
case CompressedBitVector::StorageStrategy::Sparse: return "Sparse";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVector::Intersect(CompressedBitVector const & lhs,
|
||||
CompressedBitVector const & rhs)
|
||||
{
|
||||
static IntersectOp const op;
|
||||
return Apply(op, lhs, rhs);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVector::Subtract(CompressedBitVector const & lhs,
|
||||
CompressedBitVector const & rhs)
|
||||
{
|
||||
static SubtractOp const op;
|
||||
return Apply(op, lhs, rhs);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVector::Union(CompressedBitVector const & lhs,
|
||||
CompressedBitVector const & rhs)
|
||||
{
|
||||
static UnionOp const op;
|
||||
return Apply(op, lhs, rhs);
|
||||
}
|
||||
|
||||
// static
|
||||
bool CompressedBitVector::IsEmpty(unique_ptr<CompressedBitVector> const & cbv)
|
||||
{
|
||||
return !cbv || cbv->PopCount() == 0;
|
||||
}
|
||||
|
||||
// static
|
||||
bool CompressedBitVector::IsEmpty(CompressedBitVector const * cbv)
|
||||
{
|
||||
return !cbv || cbv->PopCount() == 0;
|
||||
}
|
||||
} // namespace coding
|
||||
265
libs/coding/compressed_bit_vector.hpp
Normal file
265
libs/coding/compressed_bit_vector.hpp
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/read_write_utils.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/control_flow.hpp"
|
||||
#include "base/ref_counted.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
class CompressedBitVector : public base::RefCounted
|
||||
{
|
||||
public:
|
||||
enum class StorageStrategy
|
||||
{
|
||||
Dense,
|
||||
Sparse
|
||||
};
|
||||
|
||||
virtual ~CompressedBitVector() = default;
|
||||
|
||||
// Intersects two bit vectors.
|
||||
// todo(@pimenov) We expect the common use case to be as follows.
|
||||
// A CBV is created in memory and several CBVs are read and intersected
|
||||
// with it one by one. The in-memory CBV may initially contain a bit
|
||||
// for every feature in an mwm and the intersected CBVs are read from
|
||||
// the leaves of a search trie.
|
||||
// Therefore an optimization of Intersect comes to mind: make a wrapper
|
||||
// around TReader that will read a representation of a CBV from disk
|
||||
// and intersect it bit by bit with the global in-memory CBV bypassing such
|
||||
// routines as allocating memory and choosing strategy. They all can be called only
|
||||
// once, namely in the end, when it is needed to pack the in-memory CBV into
|
||||
// a suitable representation and pass it to the caller.
|
||||
static std::unique_ptr<CompressedBitVector> Intersect(CompressedBitVector const & lhs,
|
||||
CompressedBitVector const & rhs);
|
||||
|
||||
// Subtracts two bit vectors.
|
||||
static std::unique_ptr<CompressedBitVector> Subtract(CompressedBitVector const & lhs,
|
||||
CompressedBitVector const & rhs);
|
||||
|
||||
// Unites two bit vectors.
|
||||
static std::unique_ptr<CompressedBitVector> Union(CompressedBitVector const & lhs, CompressedBitVector const & rhs);
|
||||
|
||||
static bool IsEmpty(std::unique_ptr<CompressedBitVector> const & cbv);
|
||||
|
||||
static bool IsEmpty(CompressedBitVector const * cbv);
|
||||
|
||||
// Returns the number of set bits (population count).
|
||||
virtual uint64_t PopCount() const = 0;
|
||||
|
||||
// todo(@pimenov) How long will 32 bits be enough here?
|
||||
// Would operator[] look better?
|
||||
virtual bool GetBit(uint64_t pos) const = 0;
|
||||
|
||||
// Returns a subset of the current bit vector with first
|
||||
// min(PopCount(), |n|) set bits.
|
||||
virtual std::unique_ptr<CompressedBitVector> LeaveFirstSetNBits(uint64_t n) const = 0;
|
||||
|
||||
// Returns the strategy used when storing this bit vector.
|
||||
virtual StorageStrategy GetStorageStrategy() const = 0;
|
||||
|
||||
// Writes the contents of a bit vector to writer.
|
||||
// The first byte is always the header that defines the format.
|
||||
// Currently the header is 0 or 1 for Dense and Sparse strategies respectively.
|
||||
// It is easier to dispatch via virtual method calls and not bother
|
||||
// with template TWriters here as we do in similar places in our code.
|
||||
// This should not pose too much a problem because commonly
|
||||
// used writers are inhereted from Writer anyway.
|
||||
// todo(@pimenov). Think about rewriting Serialize and Deserialize to use the
|
||||
// code in old_compressed_bit_vector.{c,h}pp.
|
||||
virtual void Serialize(Writer & writer) const = 0;
|
||||
|
||||
// Copies a bit vector and returns a pointer to the copy.
|
||||
virtual std::unique_ptr<CompressedBitVector> Clone() const = 0;
|
||||
};
|
||||
|
||||
std::string DebugPrint(CompressedBitVector::StorageStrategy strat);
|
||||
|
||||
class DenseCBV : public CompressedBitVector
|
||||
{
|
||||
public:
|
||||
friend class CompressedBitVectorBuilder;
|
||||
static uint64_t constexpr kBlockSize = 64;
|
||||
|
||||
DenseCBV() = default;
|
||||
|
||||
// Builds a dense CBV from a list of positions of set bits.
|
||||
explicit DenseCBV(std::vector<uint64_t> const & setBits);
|
||||
|
||||
// Not to be confused with the constructor: the semantics
|
||||
// of the array of integers is completely different.
|
||||
static std::unique_ptr<DenseCBV> BuildFromBitGroups(std::vector<uint64_t> && bitGroups);
|
||||
|
||||
size_t NumBitGroups() const { return m_bitGroups.size(); }
|
||||
|
||||
template <typename Fn>
|
||||
void ForEach(Fn && f) const
|
||||
{
|
||||
base::ControlFlowWrapper<Fn> wrapper(std::forward<Fn>(f));
|
||||
for (size_t i = 0; i < m_bitGroups.size(); ++i)
|
||||
{
|
||||
for (size_t j = 0; j < kBlockSize; ++j)
|
||||
{
|
||||
if (((m_bitGroups[i] >> j) & 1) > 0)
|
||||
{
|
||||
if (wrapper(kBlockSize * i + j) == base::ControlFlow::Break)
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns 0 if the group number is too large to be contained in m_bits.
|
||||
uint64_t GetBitGroup(size_t i) const;
|
||||
|
||||
// CompressedBitVector overrides:
|
||||
uint64_t PopCount() const override;
|
||||
bool GetBit(uint64_t pos) const override;
|
||||
std::unique_ptr<CompressedBitVector> LeaveFirstSetNBits(uint64_t n) const override;
|
||||
StorageStrategy GetStorageStrategy() const override;
|
||||
void Serialize(Writer & writer) const override;
|
||||
std::unique_ptr<CompressedBitVector> Clone() const override;
|
||||
|
||||
private:
|
||||
std::vector<uint64_t> m_bitGroups;
|
||||
uint64_t m_popCount = 0;
|
||||
};
|
||||
|
||||
class SparseCBV : public CompressedBitVector
|
||||
{
|
||||
public:
|
||||
friend class CompressedBitVectorBuilder;
|
||||
using TIterator = std::vector<uint64_t>::const_iterator;
|
||||
|
||||
SparseCBV() = default;
|
||||
|
||||
explicit SparseCBV(std::vector<uint64_t> const & setBits);
|
||||
|
||||
explicit SparseCBV(std::vector<uint64_t> && setBits);
|
||||
|
||||
// Returns the position of the i'th set bit.
|
||||
uint64_t Select(size_t i) const;
|
||||
|
||||
template <typename Fn>
|
||||
void ForEach(Fn && f) const
|
||||
{
|
||||
base::ControlFlowWrapper<Fn> wrapper(std::forward<Fn>(f));
|
||||
for (auto const & position : m_positions)
|
||||
if (wrapper(position) == base::ControlFlow::Break)
|
||||
return;
|
||||
}
|
||||
|
||||
// CompressedBitVector overrides:
|
||||
uint64_t PopCount() const override;
|
||||
bool GetBit(uint64_t pos) const override;
|
||||
std::unique_ptr<CompressedBitVector> LeaveFirstSetNBits(uint64_t n) const override;
|
||||
StorageStrategy GetStorageStrategy() const override;
|
||||
void Serialize(Writer & writer) const override;
|
||||
std::unique_ptr<CompressedBitVector> Clone() const override;
|
||||
|
||||
inline TIterator Begin() const { return m_positions.cbegin(); }
|
||||
inline TIterator End() const { return m_positions.cend(); }
|
||||
|
||||
private:
|
||||
// 0-based positions of the set bits.
|
||||
std::vector<uint64_t> m_positions;
|
||||
};
|
||||
|
||||
class CompressedBitVectorBuilder
|
||||
{
|
||||
public:
|
||||
// Chooses a strategy to store the bit vector with bits from setBits set to one
|
||||
// and returns a pointer to a class that fits best.
|
||||
static std::unique_ptr<CompressedBitVector> FromBitPositions(std::vector<uint64_t> const & setBits);
|
||||
static std::unique_ptr<CompressedBitVector> FromBitPositions(std::vector<uint64_t> && setBits);
|
||||
|
||||
// Chooses a strategy to store the bit vector with bits from a bitmap obtained
|
||||
// by concatenating the elements of bitGroups.
|
||||
static std::unique_ptr<CompressedBitVector> FromBitGroups(std::vector<uint64_t> & bitGroups);
|
||||
static std::unique_ptr<CompressedBitVector> FromBitGroups(std::vector<uint64_t> && bitGroups);
|
||||
|
||||
// Reads a bit vector from reader which must contain a valid
|
||||
// bit vector representation (see CompressedBitVector::Serialize for the format).
|
||||
template <typename TReader>
|
||||
static std::unique_ptr<CompressedBitVector> DeserializeFromReader(TReader & reader)
|
||||
{
|
||||
ReaderSource<TReader> src(reader);
|
||||
return DeserializeFromSource(src);
|
||||
}
|
||||
|
||||
// Reads a bit vector from source which must contain a valid
|
||||
// bit vector representation (see CompressedBitVector::Serialize for the format).
|
||||
template <typename TSource>
|
||||
static std::unique_ptr<CompressedBitVector> DeserializeFromSource(TSource & src)
|
||||
{
|
||||
uint8_t header = ReadPrimitiveFromSource<uint8_t>(src);
|
||||
CompressedBitVector::StorageStrategy strat = static_cast<CompressedBitVector::StorageStrategy>(header);
|
||||
switch (strat)
|
||||
{
|
||||
case CompressedBitVector::StorageStrategy::Dense:
|
||||
{
|
||||
std::vector<uint64_t> bitGroups;
|
||||
rw::ReadVectorOfPOD(src, bitGroups);
|
||||
return DenseCBV::BuildFromBitGroups(std::move(bitGroups));
|
||||
}
|
||||
case CompressedBitVector::StorageStrategy::Sparse:
|
||||
{
|
||||
std::vector<uint64_t> setBits;
|
||||
rw::ReadVectorOfPOD(src, setBits);
|
||||
return std::make_unique<SparseCBV>(std::move(setBits));
|
||||
}
|
||||
}
|
||||
return std::unique_ptr<CompressedBitVector>();
|
||||
}
|
||||
};
|
||||
|
||||
// ForEach is generic and therefore cannot be virtual: a helper class is needed.
|
||||
class CompressedBitVectorEnumerator
|
||||
{
|
||||
public:
|
||||
// Executes f for each bit that is set to one using
|
||||
// the bit's 0-based position as argument.
|
||||
template <typename Fn>
|
||||
static void ForEach(CompressedBitVector const & cbv, Fn && f)
|
||||
{
|
||||
CompressedBitVector::StorageStrategy strat = cbv.GetStorageStrategy();
|
||||
switch (strat)
|
||||
{
|
||||
case CompressedBitVector::StorageStrategy::Dense:
|
||||
{
|
||||
DenseCBV const & denseCBV = static_cast<DenseCBV const &>(cbv);
|
||||
denseCBV.ForEach(f);
|
||||
return;
|
||||
}
|
||||
case CompressedBitVector::StorageStrategy::Sparse:
|
||||
{
|
||||
SparseCBV const & sparseCBV = static_cast<SparseCBV const &>(cbv);
|
||||
sparseCBV.ForEach(f);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class CompressedBitVectorHasher
|
||||
{
|
||||
public:
|
||||
static uint64_t Hash(CompressedBitVector const & cbv)
|
||||
{
|
||||
static constexpr uint64_t kBase = 127;
|
||||
uint64_t hash = 0;
|
||||
CompressedBitVectorEnumerator::ForEach(cbv, [&hash](uint64_t i) { hash = hash * kBase + i + 1; });
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
} // namespace coding
|
||||
4
libs/coding/constants.hpp
Normal file
4
libs/coding/constants.hpp
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
|
||||
static size_t const READ_FILE_BUFFER_SIZE = 512 * 1024;
|
||||
static unsigned int const ZIP_FILE_BUFFER_SIZE = 64 * 1024;
|
||||
163
libs/coding/csv_reader.cpp
Normal file
163
libs/coding/csv_reader.cpp
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
#include "coding/csv_reader.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
namespace coding
|
||||
{
|
||||
CSVReader::CSVReader(std::unique_ptr<ReaderInterface> reader, bool hasHeader, char delimiter)
|
||||
: m_reader(std::move(reader))
|
||||
, m_hasHeader(hasHeader)
|
||||
, m_delimiter(delimiter)
|
||||
{
|
||||
if (!HasHeader())
|
||||
return;
|
||||
|
||||
auto const row = ReadRow();
|
||||
if (row)
|
||||
m_header = *row;
|
||||
}
|
||||
|
||||
CSVReader::CSVReader(std::string const & filename, bool hasHeader, char delimiter)
|
||||
: CSVReader(std::make_unique<DefaultReader>(filename), hasHeader, delimiter)
|
||||
{}
|
||||
|
||||
CSVReader::CSVReader(std::istream & stream, bool hasHeader, char delimiter)
|
||||
: CSVReader(std::make_unique<IstreamWrapper>(stream), hasHeader, delimiter)
|
||||
{}
|
||||
|
||||
CSVReader::CSVReader(Reader const & reader, bool hasHeader, char delimiter)
|
||||
: CSVReader(std::make_unique<ReaderWrapper>(reader), hasHeader, delimiter)
|
||||
{}
|
||||
|
||||
bool CSVReader::HasHeader() const
|
||||
{
|
||||
return m_hasHeader;
|
||||
}
|
||||
|
||||
char CSVReader::GetDelimiter() const
|
||||
{
|
||||
return m_delimiter;
|
||||
}
|
||||
|
||||
CSVReader::Row const & CSVReader::GetHeader() const
|
||||
{
|
||||
return m_header;
|
||||
}
|
||||
|
||||
CSVReader::Rows CSVReader::ReadAll()
|
||||
{
|
||||
Rows file;
|
||||
ForEachRow([&](auto const & row) { file.emplace_back(row); });
|
||||
return file;
|
||||
}
|
||||
|
||||
std::optional<CSVReader::Row> CSVReader::ReadRow()
|
||||
{
|
||||
auto const line = m_reader->ReadLine();
|
||||
if (!line)
|
||||
return {};
|
||||
|
||||
Row row;
|
||||
strings::ParseCSVRow(*line, m_delimiter, row);
|
||||
++m_currentLine;
|
||||
return row;
|
||||
}
|
||||
|
||||
size_t CSVReader::GetCurrentLineNumber() const
|
||||
{
|
||||
return m_currentLine;
|
||||
}
|
||||
|
||||
CSVReader::IstreamWrapper::IstreamWrapper(std::istream & stream) : m_stream(stream) {}
|
||||
|
||||
std::optional<std::string> CSVReader::IstreamWrapper::ReadLine()
|
||||
{
|
||||
std::string line;
|
||||
return std::getline(m_stream, line) ? line : std::optional<std::string>();
|
||||
}
|
||||
|
||||
CSVReader::ReaderWrapper::ReaderWrapper(Reader const & reader) : m_reader(reader) {}
|
||||
|
||||
std::optional<std::string> CSVReader::ReaderWrapper::ReadLine()
|
||||
{
|
||||
std::vector<char> line;
|
||||
char ch = '\0';
|
||||
while (m_pos < m_reader.Size() && ch != '\n')
|
||||
{
|
||||
m_reader.Read(m_pos, &ch, sizeof(ch));
|
||||
line.emplace_back(ch);
|
||||
++m_pos;
|
||||
}
|
||||
|
||||
if (line.empty())
|
||||
return {};
|
||||
|
||||
auto end = std::end(line);
|
||||
if (line.back() == '\n')
|
||||
--end;
|
||||
|
||||
return std::string(std::begin(line), end);
|
||||
}
|
||||
|
||||
CSVReader::DefaultReader::DefaultReader(std::string const & filename) : m_stream(filename)
|
||||
{
|
||||
if (!m_stream)
|
||||
LOG(LERROR, ("Can't open file ", filename));
|
||||
|
||||
m_stream.exceptions(std::ios::badbit);
|
||||
}
|
||||
|
||||
std::optional<std::string> CSVReader::DefaultReader::ReadLine()
|
||||
{
|
||||
return IstreamWrapper(m_stream).ReadLine();
|
||||
}
|
||||
|
||||
CSVRunner::Iterator::Iterator(CSVReader & reader, bool isEnd) : m_reader(reader)
|
||||
{
|
||||
if (!isEnd)
|
||||
m_current = m_reader.ReadRow();
|
||||
}
|
||||
|
||||
CSVRunner::Iterator::Iterator(Iterator const & other) : m_reader(other.m_reader), m_current(other.m_current) {}
|
||||
|
||||
CSVRunner::Iterator & CSVRunner::Iterator::operator++()
|
||||
{
|
||||
m_current = m_reader.ReadRow();
|
||||
return *this;
|
||||
}
|
||||
|
||||
CSVRunner::Iterator CSVRunner::Iterator::operator++(int)
|
||||
{
|
||||
Iterator tmp(*this);
|
||||
operator++();
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool CSVRunner::Iterator::operator==(Iterator const & other) const
|
||||
{
|
||||
return &m_reader == &other.m_reader && static_cast<bool>(m_current) == static_cast<bool>(other.m_current);
|
||||
}
|
||||
|
||||
bool CSVRunner::Iterator::operator!=(Iterator const & other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
CSVReader::Row & CSVRunner::Iterator::operator*()
|
||||
{
|
||||
return *m_current;
|
||||
}
|
||||
|
||||
CSVRunner::CSVRunner(CSVReader && reader) : m_reader(std::move(reader)) {}
|
||||
|
||||
CSVRunner::Iterator CSVRunner::begin()
|
||||
{
|
||||
return Iterator(m_reader);
|
||||
}
|
||||
|
||||
CSVRunner::Iterator CSVRunner::end()
|
||||
{
|
||||
return Iterator(m_reader, true /* isEnd */);
|
||||
}
|
||||
} // namespace coding
|
||||
128
libs/coding/csv_reader.hpp
Normal file
128
libs/coding/csv_reader.hpp
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
class CSVReader
|
||||
{
|
||||
public:
|
||||
using Row = std::vector<std::string>;
|
||||
using Rows = std::vector<Row>;
|
||||
|
||||
explicit CSVReader(std::string const & filename, bool hasHeader = false, char delimiter = ',');
|
||||
explicit CSVReader(std::istream & stream, bool hasHeader = false, char delimiter = ',');
|
||||
explicit CSVReader(Reader const & reader, bool hasHeader = false, char delimiter = ',');
|
||||
|
||||
bool HasHeader() const;
|
||||
char GetDelimiter() const;
|
||||
|
||||
Row const & GetHeader() const;
|
||||
std::optional<Row> ReadRow();
|
||||
Rows ReadAll();
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachRow(Fn && fn)
|
||||
{
|
||||
while (auto const optRow = ReadRow())
|
||||
fn(*optRow);
|
||||
}
|
||||
|
||||
// The total number of lines read including the header. Count starts at 0.
|
||||
size_t GetCurrentLineNumber() const;
|
||||
|
||||
private:
|
||||
class ReaderInterface
|
||||
{
|
||||
public:
|
||||
virtual ~ReaderInterface() = default;
|
||||
|
||||
virtual std::optional<std::string> ReadLine() = 0;
|
||||
};
|
||||
|
||||
class IstreamWrapper : public ReaderInterface
|
||||
{
|
||||
public:
|
||||
explicit IstreamWrapper(std::istream & stream);
|
||||
|
||||
// ReaderInterface overrides:
|
||||
std::optional<std::string> ReadLine() override;
|
||||
|
||||
private:
|
||||
std::istream & m_stream;
|
||||
};
|
||||
|
||||
class ReaderWrapper : public ReaderInterface
|
||||
{
|
||||
public:
|
||||
explicit ReaderWrapper(Reader const & reader);
|
||||
|
||||
// ReaderInterface overrides:
|
||||
std::optional<std::string> ReadLine() override;
|
||||
|
||||
private:
|
||||
size_t m_pos = 0;
|
||||
Reader const & m_reader;
|
||||
};
|
||||
|
||||
class DefaultReader : public ReaderInterface
|
||||
{
|
||||
public:
|
||||
explicit DefaultReader(std::string const & filename);
|
||||
|
||||
// ReaderInterface overrides:
|
||||
std::optional<std::string> ReadLine() override;
|
||||
|
||||
private:
|
||||
std::ifstream m_stream;
|
||||
};
|
||||
|
||||
explicit CSVReader(std::unique_ptr<ReaderInterface> reader, bool hasHeader, char delimiter);
|
||||
|
||||
std::unique_ptr<ReaderInterface> m_reader;
|
||||
size_t m_currentLine = 0;
|
||||
bool m_hasHeader = false;
|
||||
char m_delimiter = ',';
|
||||
Row m_header;
|
||||
};
|
||||
|
||||
class CSVRunner
|
||||
{
|
||||
public:
|
||||
explicit CSVRunner(CSVReader && reader);
|
||||
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = CSVReader::Row;
|
||||
|
||||
explicit Iterator(CSVReader & reader, bool isEnd = false);
|
||||
Iterator(Iterator const & other);
|
||||
Iterator & operator++();
|
||||
Iterator operator++(int);
|
||||
// Checks whether both this and other are equal. Two CSVReader iterators are equal if both of
|
||||
// them are end-of-file iterators or not and both of them refer to the same CSVReader.
|
||||
bool operator==(Iterator const & other) const;
|
||||
bool operator!=(Iterator const & other) const;
|
||||
CSVReader::Row & operator*();
|
||||
|
||||
private:
|
||||
CSVReader & m_reader;
|
||||
std::optional<CSVReader::Row> m_current;
|
||||
};
|
||||
|
||||
// Warning: It reads first line.
|
||||
Iterator begin();
|
||||
Iterator end();
|
||||
|
||||
private:
|
||||
CSVReader m_reader;
|
||||
};
|
||||
} // namespace coding
|
||||
163
libs/coding/dd_vector.hpp
Normal file
163
libs/coding/dd_vector.hpp
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/exception.hpp"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include <boost/iterator/iterator_facade.hpp>
|
||||
|
||||
// Disk-driven vector.
|
||||
template <typename T, class TReader, typename TSize = uint32_t>
|
||||
class DDVector
|
||||
{
|
||||
public:
|
||||
typedef T value_type;
|
||||
typedef TSize size_type;
|
||||
typedef std::make_signed_t<size_type> difference_type;
|
||||
typedef TReader ReaderType;
|
||||
|
||||
DECLARE_EXCEPTION(OpenException, RootException);
|
||||
|
||||
DDVector() : m_Size(0) {}
|
||||
|
||||
explicit DDVector(TReader const & reader) : m_reader(reader) { InitSize(); }
|
||||
|
||||
void Init(TReader const & reader)
|
||||
{
|
||||
m_reader = reader;
|
||||
InitSize();
|
||||
}
|
||||
|
||||
size_type size() const { return m_Size; }
|
||||
|
||||
T const operator[](size_type i) const
|
||||
{
|
||||
return ReadPrimitiveFromPos<T>(m_reader, static_cast<uint64_t>(i) * sizeof(T));
|
||||
}
|
||||
|
||||
class const_iterator
|
||||
: public boost::iterator_facade<const_iterator, value_type const, boost::random_access_traversal_tag,
|
||||
value_type const &, difference_type>
|
||||
{
|
||||
public:
|
||||
#ifdef DEBUG
|
||||
const_iterator(ReaderType const * pReader, size_type i, size_type size)
|
||||
: m_pReader(pReader)
|
||||
, m_I(i)
|
||||
, m_bValueRead(false)
|
||||
, m_Size(size)
|
||||
{
|
||||
ASSERT(static_cast<difference_type>(m_Size) >= 0, ());
|
||||
}
|
||||
#else
|
||||
const_iterator(ReaderType const * pReader, size_type i) : m_pReader(pReader), m_I(i), m_bValueRead(false) {}
|
||||
#endif
|
||||
|
||||
T const & dereference() const
|
||||
{
|
||||
ASSERT_LESS(m_I, m_Size, (m_bValueRead));
|
||||
if (!m_bValueRead)
|
||||
{
|
||||
m_Value = ReadPrimitiveFromPos<T>(*m_pReader, static_cast<uint64_t>(m_I) * sizeof(T));
|
||||
m_bValueRead = true;
|
||||
}
|
||||
return m_Value;
|
||||
}
|
||||
|
||||
void advance(difference_type n)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
|
||||
m_I += n;
|
||||
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
|
||||
m_bValueRead = false;
|
||||
}
|
||||
|
||||
difference_type distance_to(const_iterator const & it) const
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
|
||||
ASSERT_LESS_OR_EQUAL(it.m_I, it.m_Size, (it.m_bValueRead));
|
||||
ASSERT_EQUAL(m_Size, it.m_Size, (m_I, it.m_I, m_bValueRead, it.m_bValueRead));
|
||||
ASSERT(m_pReader == it.m_pReader, (m_I, m_Size, it.m_I, it.m_Size));
|
||||
return (static_cast<difference_type>(it.m_I) - static_cast<difference_type>(m_I));
|
||||
}
|
||||
|
||||
void increment()
|
||||
{
|
||||
++m_I;
|
||||
m_bValueRead = false;
|
||||
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
|
||||
}
|
||||
|
||||
void decrement()
|
||||
{
|
||||
--m_I;
|
||||
m_bValueRead = false;
|
||||
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
|
||||
}
|
||||
|
||||
bool equal(const_iterator const & it) const
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
|
||||
ASSERT_LESS_OR_EQUAL(it.m_I, it.m_Size, (it.m_bValueRead));
|
||||
ASSERT_EQUAL(m_Size, it.m_Size, (m_I, it.m_I, m_bValueRead, it.m_bValueRead));
|
||||
ASSERT(m_pReader == it.m_pReader, (m_I, m_Size, it.m_I, it.m_Size));
|
||||
return m_I == it.m_I;
|
||||
}
|
||||
|
||||
private:
|
||||
ReaderType const * m_pReader;
|
||||
size_type m_I;
|
||||
mutable T m_Value = {};
|
||||
mutable bool m_bValueRead;
|
||||
#ifdef DEBUG
|
||||
size_type m_Size;
|
||||
#endif
|
||||
};
|
||||
|
||||
const_iterator begin() const
|
||||
{
|
||||
#ifdef DEBUG
|
||||
return const_iterator(&m_reader, 0, m_Size);
|
||||
#else
|
||||
return const_iterator(&m_reader, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
const_iterator end() const
|
||||
{
|
||||
#ifdef DEBUG
|
||||
return const_iterator(&m_reader, m_Size, m_Size);
|
||||
#else
|
||||
return const_iterator(&m_reader, m_Size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Read(size_type i, T & result) const
|
||||
{
|
||||
ASSERT_LESS(i, m_Size, ());
|
||||
ReadFromPos(m_reader, i * sizeof(T), &result, sizeof(T));
|
||||
}
|
||||
|
||||
void Read(size_type i, T * result, size_t count)
|
||||
{
|
||||
ASSERT_LESS(i + count, m_Size, (i, count));
|
||||
ReadFromPos(m_reader, i * sizeof(T), result, count * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
void InitSize()
|
||||
{
|
||||
uint64_t const sz = m_reader.Size();
|
||||
if ((sz % sizeof(T)) != 0)
|
||||
MYTHROW(OpenException, ("Element size", sizeof(T), "does not divide total size", sz));
|
||||
|
||||
m_Size = static_cast<size_type>(sz / sizeof(T));
|
||||
}
|
||||
|
||||
// TODO: Refactor me to use Reader by pointer.
|
||||
ReaderType m_reader;
|
||||
size_type m_Size;
|
||||
};
|
||||
210
libs/coding/diff.hpp
Normal file
210
libs/coding/diff.hpp
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/base.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace diff
|
||||
{
|
||||
enum Operation
|
||||
{
|
||||
OPERATION_COPY = 0,
|
||||
OPERATION_DELETE = 1,
|
||||
OPERATION_INSERT = 2,
|
||||
};
|
||||
|
||||
template <class PatchWriterT, typename SizeT = uint64_t>
|
||||
class PatchCoder
|
||||
{
|
||||
public:
|
||||
typedef SizeT size_type;
|
||||
|
||||
explicit PatchCoder(PatchWriterT & patchWriter)
|
||||
: m_LastOperation(OPERATION_COPY)
|
||||
, m_LastOpCode(0)
|
||||
, m_PatchWriter(patchWriter)
|
||||
{}
|
||||
|
||||
void Delete(size_type n)
|
||||
{
|
||||
if (n != 0)
|
||||
Op(OPERATION_DELETE, n);
|
||||
}
|
||||
|
||||
void Copy(size_type n)
|
||||
{
|
||||
if (n != 0)
|
||||
Op(OPERATION_COPY, n);
|
||||
}
|
||||
|
||||
template <typename TIter>
|
||||
void Insert(TIter it, size_type n)
|
||||
{
|
||||
if (n != 0)
|
||||
{
|
||||
Op(OPERATION_INSERT, n);
|
||||
m_PatchWriter.WriteData(it, n);
|
||||
}
|
||||
}
|
||||
|
||||
void Finalize() { WriteLasOp(); }
|
||||
|
||||
private:
|
||||
void Op(Operation op, size_type n)
|
||||
{
|
||||
if (m_LastOperation == op)
|
||||
{
|
||||
m_LastOpCode += (n << 1);
|
||||
return;
|
||||
}
|
||||
WriteLasOp();
|
||||
m_LastOpCode = (n << 1) | ((m_LastOperation + 1) % 3 == op ? 0 : 1);
|
||||
m_LastOperation = op;
|
||||
}
|
||||
|
||||
void WriteLasOp()
|
||||
{
|
||||
if (m_LastOpCode != 0)
|
||||
m_PatchWriter.WriteOperation(m_LastOpCode);
|
||||
else
|
||||
CHECK_EQUAL(m_LastOperation, OPERATION_COPY, ()); // "We were just initialized."
|
||||
}
|
||||
|
||||
Operation m_LastOperation;
|
||||
size_type m_LastOpCode;
|
||||
PatchWriterT & m_PatchWriter;
|
||||
};
|
||||
|
||||
// Find minimal patch, with no more than maxPatchSize edited values, that transforms A into B.
|
||||
// Returns the length of the minimal patch, or -1 if no such patch found.
|
||||
// Intermediate information is saved into tmpSink and can be used later to restore
|
||||
// the resulting patch.
|
||||
template <typename TSignedWord, // Signed word, capable of storing position in text.
|
||||
class TSrcVector, // Source data (A).
|
||||
class TDstVector, // Destination data (B).
|
||||
class TTmpFileSink // Sink to store temporary information.
|
||||
>
|
||||
TSignedWord DiffMyersSimple(TSrcVector const & A, TDstVector const & B, TSignedWord maxPatchSize,
|
||||
TTmpFileSink & tmpSink)
|
||||
{
|
||||
ASSERT_GREATER(maxPatchSize, 0, ());
|
||||
std::vector<TSignedWord> V(2 * maxPatchSize + 1);
|
||||
for (TSignedWord d = 0; d <= maxPatchSize; ++d)
|
||||
{
|
||||
for (TSignedWord k = -d; k <= d; k += 2)
|
||||
{
|
||||
TSignedWord x;
|
||||
if (k == -d || (k != d && V[maxPatchSize + k - 1] < V[maxPatchSize + k + 1]))
|
||||
x = V[maxPatchSize + k + 1];
|
||||
else
|
||||
x = V[maxPatchSize + k - 1] + 1;
|
||||
while (x < static_cast<TSignedWord>(A.size()) && x - k < static_cast<TSignedWord>(B.size()) && A[x] == B[x - k])
|
||||
++x;
|
||||
V[maxPatchSize + k] = x;
|
||||
if (x == static_cast<TSignedWord>(A.size()) && x - k == static_cast<TSignedWord>(B.size()))
|
||||
return d;
|
||||
}
|
||||
tmpSink.Write(&V[maxPatchSize - d], (2 * d + 1) * sizeof(TSignedWord));
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Differ that just replaces old with new, with the only optimization of skipping equal values
|
||||
// at the beginning and at the end.
|
||||
class SimpleReplaceDiffer
|
||||
{
|
||||
public:
|
||||
template <typename SrcIterT, typename DstIterT, class PatchCoderT>
|
||||
void Diff(SrcIterT srcBeg, SrcIterT srcEnd, DstIterT dstBeg, DstIterT dstEnd, PatchCoderT & patchCoder)
|
||||
{
|
||||
typename PatchCoderT::size_type begCopy = 0;
|
||||
for (; srcBeg != srcEnd && dstBeg != dstEnd && *srcBeg == *dstBeg; ++srcBeg, ++dstBeg)
|
||||
++begCopy;
|
||||
patchCoder.Copy(begCopy);
|
||||
typename PatchCoderT::size_type endCopy = 0;
|
||||
for (; srcBeg != srcEnd && dstBeg != dstEnd && *(srcEnd - 1) == *(dstEnd - 1); --srcEnd, --dstEnd)
|
||||
++endCopy;
|
||||
patchCoder.Delete(srcEnd - srcBeg);
|
||||
patchCoder.Insert(dstBeg, dstEnd - dstBeg);
|
||||
patchCoder.Copy(endCopy);
|
||||
}
|
||||
};
|
||||
|
||||
// Given FineGrainedDiff and rolling Hasher, DiffWithRollingHash splits the source sequence
|
||||
// into chunks of size m_BlockSize, finds equal chunks in the destination sequence, using rolling
|
||||
// hash to find good candidates, writes info about equal chunks into patchCoder and for everything
|
||||
// between equal chunks, calls FineGrainedDiff::Diff().
|
||||
template <class FineGrainedDiffT, class HasherT,
|
||||
class HashPosMultiMapT = std::unordered_multimap<typename HasherT::hash_type, uint64_t>>
|
||||
class RollingHashDiffer
|
||||
{
|
||||
public:
|
||||
explicit RollingHashDiffer(size_t blockSize, FineGrainedDiffT const & fineGrainedDiff = FineGrainedDiffT())
|
||||
: m_FineGrainedDiff(fineGrainedDiff)
|
||||
, m_BlockSize(blockSize)
|
||||
{}
|
||||
|
||||
template <typename SrcIterT, typename DstIterT, class PatchCoderT>
|
||||
void Diff(SrcIterT const srcBeg, SrcIterT const srcEnd, DstIterT const dstBeg, DstIterT const dstEnd,
|
||||
PatchCoderT & patchCoder)
|
||||
{
|
||||
if (srcEnd - srcBeg < static_cast<decltype(srcEnd - srcBeg)>(m_BlockSize) ||
|
||||
dstEnd - dstBeg < static_cast<decltype(dstEnd - dstBeg)>(m_BlockSize))
|
||||
{
|
||||
m_FineGrainedDiff.Diff(srcBeg, srcEnd, dstBeg, dstEnd, patchCoder);
|
||||
return;
|
||||
}
|
||||
HasherT hasher;
|
||||
HashPosMultiMapT srcHashes;
|
||||
for (SrcIterT src = srcBeg; srcEnd - src >= static_cast<decltype(srcEnd - src)>(m_BlockSize); src += m_BlockSize)
|
||||
srcHashes.insert(HashPosMultiMapValue(hasher.Init(src, m_BlockSize), src - srcBeg));
|
||||
SrcIterT srcLastDiff = srcBeg;
|
||||
DstIterT dst = dstBeg, dstNext = dstBeg + m_BlockSize, dstLastDiff = dstBeg;
|
||||
hash_type h = hasher.Init(dst, m_BlockSize);
|
||||
while (dstNext != dstEnd)
|
||||
{
|
||||
std::pair<HashPosMultiMapIterator, HashPosMultiMapIterator> iters = srcHashes.equal_range(h);
|
||||
if (iters.first != iters.second)
|
||||
{
|
||||
pos_type const srcLastDiffPos = srcLastDiff - srcBeg;
|
||||
HashPosMultiMapIterator it = srcHashes.end();
|
||||
for (HashPosMultiMapIterator i = iters.first; i != iters.second; ++i)
|
||||
if (i->second >= srcLastDiffPos && (it == srcHashes.end() || i->second < it->second))
|
||||
it = i;
|
||||
if (it != srcHashes.end() && std::equal(srcBeg + it->second, srcBeg + it->second + m_BlockSize, dst))
|
||||
{
|
||||
pos_type srcBlockEqualPos = it->second;
|
||||
m_FineGrainedDiff.Diff(srcLastDiff, srcBeg + srcBlockEqualPos, dstLastDiff, dst, patchCoder);
|
||||
patchCoder.Copy(m_BlockSize);
|
||||
srcLastDiff = srcBeg + srcBlockEqualPos + m_BlockSize;
|
||||
dst = dstLastDiff = dstNext;
|
||||
if (dstEnd - dstNext < static_cast<decltype(dstEnd - dstNext)>(m_BlockSize))
|
||||
break;
|
||||
dstNext = dst + m_BlockSize;
|
||||
h = hasher.Init(dst, m_BlockSize);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
h = hasher.Scroll(*(dst++), *(dstNext++));
|
||||
}
|
||||
if (srcLastDiff != srcEnd || dstLastDiff != dstEnd)
|
||||
m_FineGrainedDiff.Diff(srcLastDiff, srcEnd, dstLastDiff, dstEnd, patchCoder);
|
||||
}
|
||||
|
||||
private:
|
||||
typedef typename HasherT::hash_type hash_type;
|
||||
typedef typename HashPosMultiMapT::value_type::second_type pos_type;
|
||||
typedef typename HashPosMultiMapT::const_iterator HashPosMultiMapIterator;
|
||||
typedef typename HashPosMultiMapT::value_type HashPosMultiMapValue;
|
||||
|
||||
FineGrainedDiffT m_FineGrainedDiff;
|
||||
HasherT m_Hasher;
|
||||
size_t m_BlockSize;
|
||||
};
|
||||
} // namespace diff
|
||||
76
libs/coding/elias_coder.hpp
Normal file
76
libs/coding/elias_coder.hpp
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/bit_streams.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/bits.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
class GammaCoder
|
||||
{
|
||||
public:
|
||||
template <typename TWriter>
|
||||
static bool Encode(BitWriter<TWriter> & writer, uint64_t value)
|
||||
{
|
||||
if (value == 0)
|
||||
return false;
|
||||
|
||||
uint8_t const n = bits::FloorLog(value);
|
||||
ASSERT_LESS_OR_EQUAL(n, 63, ());
|
||||
|
||||
uint64_t const msb = static_cast<uint64_t>(1) << n;
|
||||
writer.WriteAtMost64Bits(msb, n + 1);
|
||||
writer.WriteAtMost64Bits(value, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename TReader>
|
||||
static uint64_t Decode(BitReader<TReader> & reader)
|
||||
{
|
||||
uint8_t n = 0;
|
||||
while (reader.Read(1) == 0)
|
||||
++n;
|
||||
|
||||
ASSERT_LESS_OR_EQUAL(n, 63, ());
|
||||
|
||||
uint64_t const msb = static_cast<uint64_t>(1) << n;
|
||||
return msb | reader.ReadAtMost64Bits(n);
|
||||
}
|
||||
};
|
||||
|
||||
class DeltaCoder
|
||||
{
|
||||
public:
|
||||
template <typename TWriter>
|
||||
static bool Encode(BitWriter<TWriter> & writer, uint64_t value)
|
||||
{
|
||||
if (value == 0)
|
||||
return false;
|
||||
|
||||
uint8_t const n = bits::FloorLog(value);
|
||||
ASSERT_LESS_OR_EQUAL(n, 63, ());
|
||||
if (!GammaCoder::Encode(writer, n + 1))
|
||||
return false;
|
||||
|
||||
writer.WriteAtMost64Bits(value, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename TReader>
|
||||
static uint64_t Decode(BitReader<TReader> & reader)
|
||||
{
|
||||
uint8_t n = GammaCoder::Decode(reader);
|
||||
|
||||
ASSERT_GREATER(n, 0, ());
|
||||
--n;
|
||||
|
||||
ASSERT_LESS_OR_EQUAL(n, 63, ());
|
||||
|
||||
uint64_t const msb = static_cast<uint64_t>(1) << n;
|
||||
return msb | reader.ReadAtMost64Bits(n);
|
||||
}
|
||||
};
|
||||
} // namespace coding
|
||||
54
libs/coding/endianness.hpp
Normal file
54
libs/coding/endianness.hpp
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/base.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
// #define ENDIAN_IS_BIG
|
||||
|
||||
// @TODO(bykoianko) This method returns false since 05.12.2010. That means only little-endian
|
||||
// architectures are supported. When it's necessary to support a big-endian system:
|
||||
// * method IsBigEndianMacroBased() should be implemented based on IsLittleEndian() function
|
||||
// * method SwapIfBigEndianMacroBased() should be implemented based on IsLittleEndian() function
|
||||
// * all serialization and deserialization of rs_bit_vector and the other rank-select structures
|
||||
// should be implemented taking endianness into account
|
||||
|
||||
inline bool IsBigEndianMacroBased()
|
||||
{
|
||||
#ifdef ENDIAN_IS_BIG
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T ReverseByteOrder(T t)
|
||||
{
|
||||
static_assert(std::is_integral<T>::value, "Only integral types are supported.");
|
||||
|
||||
T res;
|
||||
char const * a = reinterpret_cast<char const *>(&t);
|
||||
char * b = reinterpret_cast<char *>(&res);
|
||||
for (size_t i = 0; i < sizeof(T); ++i)
|
||||
b[i] = a[sizeof(T) - 1 - i];
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T SwapIfBigEndianMacroBased(T t)
|
||||
{
|
||||
#ifdef ENDIAN_IS_BIG
|
||||
return ReverseByteOrder(t);
|
||||
#else
|
||||
return t;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool IsLittleEndian()
|
||||
{
|
||||
uint16_t const word = 0x0001;
|
||||
uint8_t const * b = reinterpret_cast<uint8_t const *>(&word);
|
||||
return b[0] != 0x0;
|
||||
}
|
||||
134
libs/coding/file_reader.cpp
Normal file
134
libs/coding/file_reader.cpp
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
#include "coding/file_reader.hpp"
|
||||
|
||||
#include "coding/internal/file_data.hpp"
|
||||
#include "coding/reader_cache.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#ifndef LOG_FILE_READER_STATS
|
||||
#define LOG_FILE_READER_STATS 0
|
||||
#endif // LOG_FILE_READER_STATS
|
||||
|
||||
#if LOG_FILE_READER_STATS && !defined(LOG_FILE_READER_EVERY_N_READS_MASK)
|
||||
#define LOG_FILE_READER_EVERY_N_READS_MASK 0xFFFFFFFF
|
||||
#endif
|
||||
|
||||
// static
|
||||
uint32_t const FileReader::kDefaultLogPageSize = 10; // page size is 2^10 = 1024 = 1kb
|
||||
// static
|
||||
uint32_t const FileReader::kDefaultLogPageCount = 4; // page count is 2^4 = 16, i.e. 16 pages are cached
|
||||
|
||||
class FileReader::FileReaderData
|
||||
{
|
||||
public:
|
||||
FileReaderData(std::string const & fileName, uint32_t logPageSize, uint32_t logPageCount)
|
||||
: m_fileData(fileName)
|
||||
, m_readerCache(logPageSize, logPageCount)
|
||||
{
|
||||
#if LOG_FILE_READER_STATS
|
||||
m_readCallCount = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
~FileReaderData()
|
||||
{
|
||||
#if LOG_FILE_READER_STATS
|
||||
LOG(LINFO, ("FileReader", m_fileData.GetName(), m_readerCache.GetStatsStr()));
|
||||
#endif
|
||||
}
|
||||
|
||||
uint64_t Size() const { return m_fileData.Size(); }
|
||||
|
||||
void Read(uint64_t pos, void * p, size_t size)
|
||||
{
|
||||
#if LOG_FILE_READER_STATS
|
||||
if (((++m_readCallCount) & LOG_FILE_READER_EVERY_N_READS_MASK) == 0)
|
||||
LOG(LINFO, ("FileReader", m_fileData.GetName(), m_readerCache.GetStatsStr()));
|
||||
#endif
|
||||
|
||||
return m_readerCache.Read(m_fileData, pos, p, size);
|
||||
}
|
||||
|
||||
private:
|
||||
class FileDataWithCachedSize : public base::FileData
|
||||
{
|
||||
public:
|
||||
explicit FileDataWithCachedSize(std::string const & fileName)
|
||||
: base::FileData(fileName, Op::READ)
|
||||
, m_Size(FileData::Size())
|
||||
{}
|
||||
|
||||
uint64_t Size() const { return m_Size; }
|
||||
|
||||
private:
|
||||
uint64_t m_Size;
|
||||
};
|
||||
|
||||
FileDataWithCachedSize m_fileData;
|
||||
ReaderCache<FileDataWithCachedSize, LOG_FILE_READER_STATS> m_readerCache;
|
||||
|
||||
#if LOG_FILE_READER_STATS
|
||||
uint32_t m_readCallCount;
|
||||
#endif
|
||||
};
|
||||
|
||||
FileReader::FileReader(std::string const & fileName) : FileReader(fileName, kDefaultLogPageSize, kDefaultLogPageCount)
|
||||
{}
|
||||
|
||||
FileReader::FileReader(std::string const & fileName, uint32_t logPageSize, uint32_t logPageCount)
|
||||
: ModelReader(fileName)
|
||||
, m_logPageSize(logPageSize)
|
||||
, m_logPageCount(logPageCount)
|
||||
, m_fileData(std::make_shared<FileReaderData>(fileName, logPageSize, logPageCount))
|
||||
, m_offset(0)
|
||||
, m_size(m_fileData->Size())
|
||||
{}
|
||||
|
||||
FileReader::FileReader(FileReader const & reader, uint64_t offset, uint64_t size, uint32_t logPageSize,
|
||||
uint32_t logPageCount)
|
||||
: ModelReader(reader.GetName())
|
||||
, m_logPageSize(logPageSize)
|
||||
, m_logPageCount(logPageCount)
|
||||
, m_fileData(reader.m_fileData)
|
||||
, m_offset(offset)
|
||||
, m_size(size)
|
||||
{}
|
||||
|
||||
void FileReader::Read(uint64_t pos, void * p, size_t size) const
|
||||
{
|
||||
CheckPosAndSize(pos, size);
|
||||
m_fileData->Read(m_offset + pos, p, size);
|
||||
}
|
||||
|
||||
FileReader FileReader::SubReader(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
CheckPosAndSize(pos, size);
|
||||
return FileReader(*this, m_offset + pos, size, m_logPageSize, m_logPageCount);
|
||||
}
|
||||
|
||||
std::unique_ptr<Reader> FileReader::CreateSubReader(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
CheckPosAndSize(pos, size);
|
||||
// Can't use make_unique with private constructor.
|
||||
return std::unique_ptr<Reader>(new FileReader(*this, m_offset + pos, size, m_logPageSize, m_logPageCount));
|
||||
}
|
||||
|
||||
void FileReader::CheckPosAndSize(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
uint64_t const allSize1 = Size();
|
||||
bool const ret1 = (pos + size <= allSize1);
|
||||
if (!ret1)
|
||||
MYTHROW(Reader::SizeException, (pos, size, allSize1));
|
||||
|
||||
uint64_t const allSize2 = m_fileData->Size();
|
||||
bool const ret2 = (m_offset + pos + size <= allSize2);
|
||||
if (!ret2)
|
||||
MYTHROW(Reader::SizeException, (pos, size, allSize2));
|
||||
}
|
||||
|
||||
void FileReader::SetOffsetAndSize(uint64_t offset, uint64_t size)
|
||||
{
|
||||
CheckPosAndSize(offset, size);
|
||||
m_offset = offset;
|
||||
m_size = size;
|
||||
}
|
||||
49
libs/coding/file_reader.hpp
Normal file
49
libs/coding/file_reader.hpp
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include "base/base.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
// FileReader, cheap to copy, not thread safe.
|
||||
// It is assumed that file is not modified during FireReader lifetime,
|
||||
// because of caching and assumption that Size() is constant.
|
||||
class FileReader : public ModelReader
|
||||
{
|
||||
public:
|
||||
static uint32_t const kDefaultLogPageSize;
|
||||
static uint32_t const kDefaultLogPageCount;
|
||||
|
||||
explicit FileReader(std::string const & fileName);
|
||||
FileReader(std::string const & fileName, uint32_t logPageSize, uint32_t logPageCount);
|
||||
|
||||
// Reader overrides:
|
||||
uint64_t Size() const override { return m_size; }
|
||||
void Read(uint64_t pos, void * p, size_t size) const override;
|
||||
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const override;
|
||||
|
||||
FileReader SubReader(uint64_t pos, uint64_t size) const;
|
||||
uint64_t GetOffset() const { return m_offset; }
|
||||
|
||||
protected:
|
||||
// Used in special derived readers.
|
||||
void SetOffsetAndSize(uint64_t offset, uint64_t size);
|
||||
|
||||
private:
|
||||
class FileReaderData;
|
||||
|
||||
FileReader(FileReader const & reader, uint64_t offset, uint64_t size, uint32_t logPageSize, uint32_t logPageCount);
|
||||
|
||||
// Throws an exception if a (pos, size) read would result in an out-of-bounds access.
|
||||
void CheckPosAndSize(uint64_t pos, uint64_t size) const;
|
||||
|
||||
uint32_t m_logPageSize;
|
||||
uint32_t m_logPageCount;
|
||||
std::shared_ptr<FileReaderData> m_fileData;
|
||||
uint64_t m_offset;
|
||||
uint64_t m_size;
|
||||
};
|
||||
142
libs/coding/file_sort.hpp
Normal file
142
libs/coding/file_sort.hpp
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/file_reader.hpp"
|
||||
#include "coding/file_writer.hpp"
|
||||
|
||||
#include "base/base.hpp"
|
||||
#include "base/exception.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
template <typename LessT>
|
||||
struct Sorter
|
||||
{
|
||||
LessT m_Less;
|
||||
Sorter(LessT lessF) : m_Less(lessF) {}
|
||||
template <typename IterT>
|
||||
void operator()(IterT beg, IterT end) const
|
||||
{
|
||||
std::sort(beg, end, m_Less);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, // Item type.
|
||||
class OutputSinkT = FileWriter, // Sink to output into result file.
|
||||
typename LessT = std::less<T>, // Item comparator.
|
||||
template <typename LessT1> class SorterT = Sorter // Item sorter.
|
||||
>
|
||||
class FileSorter
|
||||
{
|
||||
public:
|
||||
FileSorter(size_t bufferBytes, std::string const & tmpFileName, OutputSinkT & outputSink, LessT fLess = LessT())
|
||||
: m_TmpFileName(tmpFileName)
|
||||
, m_BufferCapacity(std::max(size_t(16), bufferBytes / sizeof(T)))
|
||||
, m_OutputSink(outputSink)
|
||||
, m_ItemCount(0)
|
||||
, m_Less(fLess)
|
||||
{
|
||||
m_Buffer.reserve(m_BufferCapacity);
|
||||
m_pTmpWriter.reset(new FileWriter(tmpFileName));
|
||||
}
|
||||
|
||||
void Add(T const & item)
|
||||
{
|
||||
if (m_Buffer.size() == m_BufferCapacity)
|
||||
FlushToTmpFile();
|
||||
m_Buffer.push_back(item);
|
||||
++m_ItemCount;
|
||||
}
|
||||
|
||||
void SortAndFinish()
|
||||
{
|
||||
ASSERT(m_pTmpWriter.get(), ());
|
||||
FlushToTmpFile();
|
||||
|
||||
// Write output.
|
||||
{
|
||||
m_pTmpWriter.reset();
|
||||
FileReader reader(m_TmpFileName);
|
||||
ItemIndexPairGreater fGreater(m_Less);
|
||||
PriorityQueue q(fGreater);
|
||||
for (uint32_t i = 0; i < m_ItemCount; i += m_BufferCapacity)
|
||||
Push(q, i, reader);
|
||||
|
||||
while (!q.empty())
|
||||
{
|
||||
m_OutputSink(q.top().first);
|
||||
uint32_t const i = q.top().second + 1;
|
||||
q.pop();
|
||||
if (i % m_BufferCapacity != 0 && i < m_ItemCount)
|
||||
Push(q, i, reader);
|
||||
}
|
||||
}
|
||||
FileWriter::DeleteFileX(m_TmpFileName);
|
||||
}
|
||||
|
||||
~FileSorter()
|
||||
{
|
||||
if (m_pTmpWriter.get())
|
||||
{
|
||||
try
|
||||
{
|
||||
SortAndFinish();
|
||||
}
|
||||
catch (RootException const & e)
|
||||
{
|
||||
LOG(LERROR, (e.Msg()));
|
||||
}
|
||||
catch (std::exception const & e)
|
||||
{
|
||||
LOG(LERROR, (e.what()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
struct ItemIndexPairGreater
|
||||
{
|
||||
explicit ItemIndexPairGreater(LessT fLess) : m_Less(fLess) {}
|
||||
inline bool operator()(std::pair<T, uint32_t> const & a, std::pair<T, uint32_t> const & b) const
|
||||
{
|
||||
return m_Less(b.first, a.first);
|
||||
}
|
||||
LessT m_Less;
|
||||
};
|
||||
|
||||
using PriorityQueue =
|
||||
std::priority_queue<std::pair<T, uint32_t>, std::vector<std::pair<T, uint32_t>>, ItemIndexPairGreater>;
|
||||
|
||||
void FlushToTmpFile()
|
||||
{
|
||||
if (m_Buffer.empty())
|
||||
return;
|
||||
SorterT<LessT> sorter(m_Less);
|
||||
sorter(m_Buffer.begin(), m_Buffer.end());
|
||||
m_pTmpWriter->Write(&m_Buffer[0], m_Buffer.size() * sizeof(T));
|
||||
m_Buffer.clear();
|
||||
}
|
||||
|
||||
void Push(PriorityQueue & q, uint32_t i, FileReader const & reader)
|
||||
{
|
||||
T item;
|
||||
reader.Read(static_cast<uint64_t>(i) * sizeof(T), &item, sizeof(T));
|
||||
q.push(std::pair<T, uint32_t>(item, i));
|
||||
}
|
||||
|
||||
std::string const m_TmpFileName;
|
||||
size_t const m_BufferCapacity;
|
||||
OutputSinkT & m_OutputSink;
|
||||
std::unique_ptr<FileWriter> m_pTmpWriter;
|
||||
std::vector<T> m_Buffer;
|
||||
uint32_t m_ItemCount;
|
||||
LessT m_Less;
|
||||
};
|
||||
49
libs/coding/file_writer.cpp
Normal file
49
libs/coding/file_writer.cpp
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#include "coding/file_writer.hpp"
|
||||
#include "coding/internal/file_data.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
FileWriter::FileWriter(std::string const & fileName, FileWriter::Op op)
|
||||
: m_pFileData(std::make_unique<base::FileData>(fileName, static_cast<base::FileData::Op>(op)))
|
||||
{}
|
||||
|
||||
FileWriter::~FileWriter() noexcept(false)
|
||||
{
|
||||
// Note: FileWriter::Flush will be called (like non virtual method).
|
||||
Flush();
|
||||
}
|
||||
|
||||
uint64_t FileWriter::Pos() const
|
||||
{
|
||||
return m_pFileData->Pos();
|
||||
}
|
||||
|
||||
void FileWriter::Seek(uint64_t pos)
|
||||
{
|
||||
m_pFileData->Seek(pos);
|
||||
}
|
||||
|
||||
void FileWriter::Write(void const * p, size_t size)
|
||||
{
|
||||
m_pFileData->Write(p, size);
|
||||
}
|
||||
|
||||
std::string const & FileWriter::GetName() const
|
||||
{
|
||||
return m_pFileData->GetName();
|
||||
}
|
||||
|
||||
uint64_t FileWriter::Size() const
|
||||
{
|
||||
return m_pFileData->Size();
|
||||
}
|
||||
|
||||
void FileWriter::Flush() noexcept(false)
|
||||
{
|
||||
m_pFileData->Flush();
|
||||
}
|
||||
|
||||
void FileWriter::DeleteFileX(std::string const & fName)
|
||||
{
|
||||
UNUSED_VALUE(base::DeleteFileX(fName));
|
||||
}
|
||||
91
libs/coding/file_writer.hpp
Normal file
91
libs/coding/file_writer.hpp
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/internal/file_data.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
// FileWriter, not thread safe.
|
||||
class FileWriter : public Writer
|
||||
{
|
||||
DISALLOW_COPY(FileWriter);
|
||||
|
||||
public:
|
||||
// Values actually match internal FileData::Op enum.
|
||||
enum Op
|
||||
{
|
||||
// Create an empty file for writing. If a file with the same name already exists
|
||||
// its content is erased and the file is treated as a new empty file.
|
||||
OP_WRITE_TRUNCATE = 1,
|
||||
|
||||
// Open a file for update. The file is created if it does not exist.
|
||||
OP_WRITE_EXISTING = 2,
|
||||
|
||||
// Append to a file. Writing operations append data at the end of the file.
|
||||
// The file is created if it does not exist.
|
||||
// Seek should not be called, if file is opened for append.
|
||||
OP_APPEND = 3
|
||||
};
|
||||
|
||||
explicit FileWriter(std::string const & fileName, Op operation = OP_WRITE_TRUNCATE);
|
||||
FileWriter(FileWriter && rhs) = default;
|
||||
|
||||
virtual ~FileWriter() noexcept(false);
|
||||
|
||||
// Writer overrides:
|
||||
void Seek(uint64_t pos) override;
|
||||
uint64_t Pos() const override;
|
||||
void Write(void const * p, size_t size) override;
|
||||
|
||||
virtual uint64_t Size() const;
|
||||
virtual void Flush() noexcept(false);
|
||||
|
||||
std::string const & GetName() const;
|
||||
|
||||
static void DeleteFileX(std::string const & fName);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<base::FileData> m_pFileData;
|
||||
};
|
||||
|
||||
class FilesContainerWriter : public FileWriter
|
||||
{
|
||||
public:
|
||||
FilesContainerWriter(std::string const & fileName, Op operation) : FileWriter(fileName, operation) {}
|
||||
|
||||
void WritePaddingByEnd(size_t factor) { WritePadding(Size(), factor); }
|
||||
void WritePaddingByPos(size_t factor) { WritePadding(Pos(), factor); }
|
||||
|
||||
private:
|
||||
void WritePadding(uint64_t offset, uint64_t factor)
|
||||
{
|
||||
ASSERT_GREATER(factor, 1, ());
|
||||
uint64_t const padding = ((offset + factor - 1) / factor) * factor - offset;
|
||||
if (padding == 0)
|
||||
return;
|
||||
WriteZeroesToSink(*this, padding);
|
||||
}
|
||||
};
|
||||
|
||||
class TruncatingFileWriter : public FilesContainerWriter
|
||||
{
|
||||
public:
|
||||
explicit TruncatingFileWriter(std::string const & fileName)
|
||||
: FilesContainerWriter(fileName, FileWriter::OP_WRITE_EXISTING)
|
||||
{}
|
||||
|
||||
TruncatingFileWriter(TruncatingFileWriter && rhs) = default;
|
||||
|
||||
~TruncatingFileWriter() noexcept(false) override
|
||||
{
|
||||
m_pFileData->Flush();
|
||||
m_pFileData->Truncate(Pos());
|
||||
}
|
||||
};
|
||||
463
libs/coding/files_container.cpp
Normal file
463
libs/coding/files_container.cpp
Normal file
|
|
@ -0,0 +1,463 @@
|
|||
#include "coding/files_container.hpp"
|
||||
|
||||
#include "coding/internal/file_data.hpp"
|
||||
#include "coding/read_write_utils.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
#include "std/windows.hpp"
|
||||
#else
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h> // _SC_PAGESIZE
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
template <typename Source, typename Info>
|
||||
void Read(Source & src, Info & i)
|
||||
{
|
||||
rw::Read(src, i.m_tag);
|
||||
|
||||
i.m_offset = ReadVarUint<uint64_t>(src);
|
||||
i.m_size = ReadVarUint<uint64_t>(src);
|
||||
}
|
||||
|
||||
template <typename Sink, typename Info>
|
||||
void Write(Sink & sink, Info const & i)
|
||||
{
|
||||
rw::Write(sink, i.m_tag);
|
||||
|
||||
WriteVarUint(sink, i.m_offset);
|
||||
WriteVarUint(sink, i.m_size);
|
||||
}
|
||||
|
||||
std::string DebugPrint(FilesContainerBase::TagInfo const & info)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
ss << "{ " << info.m_tag << ", " << info.m_offset << ", " << info.m_size << " }";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// FilesContainerBase
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Reader>
|
||||
void FilesContainerBase::ReadInfo(Reader & reader)
|
||||
{
|
||||
uint64_t offset = ReadPrimitiveFromPos<uint64_t>(reader, 0);
|
||||
|
||||
ReaderSource<Reader> src(reader);
|
||||
src.Skip(offset);
|
||||
|
||||
rw::Read(src, m_info);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// FilesContainerR
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FilesContainerR::FilesContainerR(std::string const & filePath, uint32_t logPageSize, uint32_t logPageCount)
|
||||
: m_source(std::make_unique<FileReader>(filePath, logPageSize, logPageCount))
|
||||
{
|
||||
ReadInfo(m_source);
|
||||
}
|
||||
|
||||
FilesContainerR::FilesContainerR(TReader const & file) : m_source(file)
|
||||
{
|
||||
ReadInfo(m_source);
|
||||
}
|
||||
|
||||
FilesContainerR::TReader FilesContainerR::GetReader(Tag const & tag) const
|
||||
{
|
||||
TagInfo const * p = GetInfo(tag);
|
||||
if (!p)
|
||||
MYTHROW(Reader::OpenException, ("Can't find section:", GetFileName(), tag));
|
||||
return m_source.SubReader(p->m_offset, p->m_size);
|
||||
}
|
||||
|
||||
std::pair<uint64_t, uint64_t> FilesContainerR::GetAbsoluteOffsetAndSize(Tag const & tag) const
|
||||
{
|
||||
TagInfo const * p = GetInfo(tag);
|
||||
if (!p)
|
||||
MYTHROW(Reader::OpenException, ("Can't find section:", GetFileName(), tag));
|
||||
|
||||
auto reader = dynamic_cast<FileReader const *>(m_source.GetPtr());
|
||||
uint64_t const offset = reader ? reader->GetOffset() : 0;
|
||||
return std::make_pair(offset + p->m_offset, p->m_size);
|
||||
}
|
||||
|
||||
FilesContainerBase::TagInfo const * FilesContainerBase::GetInfo(Tag const & tag) const
|
||||
{
|
||||
auto i = lower_bound(m_info.begin(), m_info.end(), tag, LessInfo());
|
||||
if (i != m_info.end() && i->m_tag == tag)
|
||||
return &(*i);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// MappedFile
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
void MappedFile::Open(std::string const & fName)
|
||||
{
|
||||
Close();
|
||||
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
m_hFile = CreateFileA(fName.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, NULL);
|
||||
if (m_hFile == INVALID_HANDLE_VALUE)
|
||||
MYTHROW(Reader::OpenException, ("Can't open file:", fName, "win last error:", GetLastError()));
|
||||
m_hMapping = CreateFileMappingA(m_hFile, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
if (m_hMapping == NULL)
|
||||
MYTHROW(Reader::OpenException, ("Can't create file's Windows mapping:", fName, "win last error:", GetLastError()));
|
||||
#else
|
||||
m_fd = open(fName.c_str(), O_RDONLY | O_NONBLOCK);
|
||||
if (m_fd == -1)
|
||||
{
|
||||
if (errno == EMFILE || errno == ENFILE)
|
||||
MYTHROW(Reader::TooManyFilesException, ("Can't open file:", fName, ", reason:", strerror(errno)));
|
||||
else
|
||||
MYTHROW(Reader::OpenException, ("Can't open file:", fName, ", reason:", strerror(errno)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void MappedFile::Close()
|
||||
{
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
if (m_hMapping != INVALID_HANDLE_VALUE)
|
||||
{
|
||||
CloseHandle(m_hMapping);
|
||||
m_hMapping = INVALID_HANDLE_VALUE;
|
||||
}
|
||||
if (m_hFile != INVALID_HANDLE_VALUE)
|
||||
{
|
||||
CloseHandle(m_hFile);
|
||||
m_hFile = INVALID_HANDLE_VALUE;
|
||||
}
|
||||
#else
|
||||
if (m_fd != -1)
|
||||
{
|
||||
close(m_fd);
|
||||
m_fd = -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
MappedFile::Handle MappedFile::Map(uint64_t offset, uint64_t size, std::string const & tag) const
|
||||
{
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
SYSTEM_INFO sysInfo;
|
||||
memset(&sysInfo, 0, sizeof(sysInfo));
|
||||
GetSystemInfo(&sysInfo);
|
||||
long const align = sysInfo.dwAllocationGranularity;
|
||||
#else
|
||||
long const align = sysconf(_SC_PAGESIZE);
|
||||
#endif
|
||||
|
||||
uint64_t const alignedOffset = (offset / align) * align;
|
||||
ASSERT_LESS_OR_EQUAL(alignedOffset, offset, ());
|
||||
uint64_t const length = size + (offset - alignedOffset);
|
||||
ASSERT_GREATER_OR_EQUAL(length, size, ());
|
||||
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
void * pMap =
|
||||
MapViewOfFile(m_hMapping, FILE_MAP_READ, alignedOffset >> (sizeof(DWORD) * 8), DWORD(alignedOffset), length);
|
||||
if (pMap == NULL)
|
||||
MYTHROW(Reader::OpenException,
|
||||
("Can't map section:", tag, "with [offset, size]:", offset, size, "win last error:", GetLastError()));
|
||||
#else
|
||||
void * pMap = mmap(0, static_cast<size_t>(length), PROT_READ, MAP_SHARED, m_fd, static_cast<off_t>(alignedOffset));
|
||||
if (pMap == MAP_FAILED)
|
||||
MYTHROW(Reader::OpenException,
|
||||
("Can't map section:", tag, "with [offset, size]:", offset, size, "errno:", strerror(errno)));
|
||||
#endif
|
||||
|
||||
char const * data = reinterpret_cast<char const *>(pMap);
|
||||
char const * d = data + (offset - alignedOffset);
|
||||
return Handle(d, data, size, length);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// FilesMappingContainer
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FilesMappingContainer::FilesMappingContainer(std::string const & fName)
|
||||
{
|
||||
Open(fName);
|
||||
}
|
||||
|
||||
FilesMappingContainer::~FilesMappingContainer()
|
||||
{
|
||||
Close();
|
||||
}
|
||||
|
||||
void FilesMappingContainer::Open(std::string const & fName)
|
||||
{
|
||||
{
|
||||
FileReader reader(fName);
|
||||
ReadInfo(reader);
|
||||
}
|
||||
|
||||
m_file.Open(fName);
|
||||
|
||||
m_name = fName;
|
||||
}
|
||||
|
||||
void FilesMappingContainer::Close()
|
||||
{
|
||||
m_file.Close();
|
||||
|
||||
m_name.clear();
|
||||
}
|
||||
|
||||
FilesMappingContainer::Handle FilesMappingContainer::Map(Tag const & tag) const
|
||||
{
|
||||
TagInfo const * p = GetInfo(tag);
|
||||
if (!p)
|
||||
MYTHROW(Reader::OpenException, ("Can't find section:", m_name, tag));
|
||||
|
||||
ASSERT_EQUAL(tag, p->m_tag, ());
|
||||
return m_file.Map(p->m_offset, p->m_size, tag);
|
||||
}
|
||||
|
||||
FileReader FilesMappingContainer::GetReader(Tag const & tag) const
|
||||
{
|
||||
TagInfo const * p = GetInfo(tag);
|
||||
if (!p)
|
||||
MYTHROW(Reader::OpenException, ("Can't find section:", m_name, tag));
|
||||
return FileReader(m_name).SubReader(p->m_offset, p->m_size);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// FilesMappingContainer::Handle
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
detail::MappedFile::Handle::~Handle()
|
||||
{
|
||||
Unmap();
|
||||
}
|
||||
|
||||
void FilesMappingContainer::Handle::Assign(Handle && h)
|
||||
{
|
||||
Unmap();
|
||||
|
||||
m_base = h.m_base;
|
||||
m_origBase = h.m_origBase;
|
||||
m_size = h.m_size;
|
||||
m_origSize = h.m_origSize;
|
||||
|
||||
h.Reset();
|
||||
}
|
||||
|
||||
void FilesMappingContainer::Handle::Unmap()
|
||||
{
|
||||
if (IsValid())
|
||||
{
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
VERIFY(UnmapViewOfFile(m_origBase), ());
|
||||
#else
|
||||
VERIFY(0 == munmap((void *)m_origBase, static_cast<size_t>(m_origSize)), ());
|
||||
#endif
|
||||
Reset();
|
||||
}
|
||||
}
|
||||
|
||||
void FilesMappingContainer::Handle::Reset()
|
||||
{
|
||||
m_base = m_origBase = 0;
|
||||
m_size = m_origSize = 0;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// FilesContainerW
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FilesContainerW::FilesContainerW(std::string const & fName, FileWriter::Op op) : m_name(fName), m_finished(false)
|
||||
{
|
||||
Open(op);
|
||||
}
|
||||
|
||||
void FilesContainerW::Open(FileWriter::Op op)
|
||||
{
|
||||
m_needRewrite = true;
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case FileWriter::OP_WRITE_TRUNCATE: break;
|
||||
|
||||
case FileWriter::OP_WRITE_EXISTING:
|
||||
{
|
||||
// read an existing service info
|
||||
FileReader reader(m_name);
|
||||
ReadInfo(reader);
|
||||
}
|
||||
|
||||
// Important: in append mode we should sort info-vector by offsets
|
||||
sort(m_info.begin(), m_info.end(), LessOffset());
|
||||
|
||||
// Check that all offsets are unique
|
||||
#ifdef DEBUG
|
||||
for (size_t i = 1; i < m_info.size(); ++i)
|
||||
ASSERT(m_info[i - 1].m_offset < m_info[i].m_offset || m_info[i - 1].m_size == 0 || m_info[i].m_size == 0, ());
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: ASSERT(false, ("Unsupported options")); break;
|
||||
}
|
||||
|
||||
if (m_info.empty())
|
||||
StartNew();
|
||||
}
|
||||
|
||||
void FilesContainerW::StartNew()
|
||||
{
|
||||
// leave space for offset to service info
|
||||
FileWriter writer(m_name);
|
||||
uint64_t skip = 0;
|
||||
writer.Write(&skip, sizeof(skip));
|
||||
m_needRewrite = false;
|
||||
}
|
||||
|
||||
FilesContainerW::~FilesContainerW()
|
||||
{
|
||||
if (!m_finished)
|
||||
Finish();
|
||||
}
|
||||
|
||||
uint64_t FilesContainerW::SaveCurrentSize()
|
||||
{
|
||||
ASSERT(!m_finished, ());
|
||||
uint64_t const curr = FileReader(m_name).Size();
|
||||
if (!m_info.empty())
|
||||
m_info.back().m_size = curr - m_info.back().m_offset;
|
||||
return curr;
|
||||
}
|
||||
|
||||
void FilesContainerW::DeleteSection(Tag const & tag)
|
||||
{
|
||||
{
|
||||
// rewrite files on disk
|
||||
FilesContainerR contR(m_name);
|
||||
FilesContainerW contW(m_name + ".tmp");
|
||||
|
||||
for (size_t i = 0; i < m_info.size(); ++i)
|
||||
if (m_info[i].m_tag != tag)
|
||||
contW.Write(contR.GetReader(m_info[i].m_tag), m_info[i].m_tag);
|
||||
}
|
||||
|
||||
// swap files
|
||||
if (!base::DeleteFileX(m_name) || !base::RenameFileX(m_name + ".tmp", m_name))
|
||||
MYTHROW(RootException, ("Can't rename file", m_name, "Sharing violation or disk error!"));
|
||||
|
||||
// do open to update m_info
|
||||
Open(FileWriter::OP_WRITE_EXISTING);
|
||||
}
|
||||
|
||||
std::unique_ptr<FilesContainerWriter> FilesContainerW::GetWriter(Tag const & tag)
|
||||
{
|
||||
ASSERT(!m_finished, ());
|
||||
|
||||
InfoContainer::const_iterator it = find_if(m_info.begin(), m_info.end(), EqualTag(tag));
|
||||
if (it != m_info.end())
|
||||
{
|
||||
if (it + 1 == m_info.end())
|
||||
{
|
||||
m_info.pop_back();
|
||||
|
||||
if (m_info.empty())
|
||||
StartNew();
|
||||
else
|
||||
m_needRewrite = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
DeleteSection(it->m_tag);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_needRewrite)
|
||||
{
|
||||
m_needRewrite = false;
|
||||
|
||||
ASSERT(!m_info.empty(), ());
|
||||
|
||||
uint64_t const curr = m_info.back().m_offset + m_info.back().m_size;
|
||||
auto writer = std::make_unique<TruncatingFileWriter>(m_name);
|
||||
writer->Seek(curr);
|
||||
writer->WritePaddingByPos(kSectionAlignment);
|
||||
|
||||
m_info.emplace_back(tag, writer->Pos());
|
||||
ASSERT_EQUAL(m_info.back().m_offset % kSectionAlignment, 0, ());
|
||||
return writer;
|
||||
}
|
||||
else
|
||||
{
|
||||
SaveCurrentSize();
|
||||
|
||||
auto writer = std::make_unique<FilesContainerWriter>(m_name, FileWriter::OP_APPEND);
|
||||
writer->WritePaddingByPos(kSectionAlignment);
|
||||
|
||||
m_info.emplace_back(tag, writer->Pos());
|
||||
ASSERT_EQUAL(m_info.back().m_offset % kSectionAlignment, 0, ());
|
||||
return writer;
|
||||
}
|
||||
}
|
||||
|
||||
void FilesContainerW::Write(std::string const & fPath, Tag const & tag)
|
||||
{
|
||||
Write(ModelReaderPtr(std::make_unique<FileReader>(fPath)), tag);
|
||||
}
|
||||
|
||||
void FilesContainerW::Write(ModelReaderPtr reader, Tag const & tag)
|
||||
{
|
||||
ReaderSource<ModelReaderPtr> src(reader);
|
||||
auto writer = GetWriter(tag);
|
||||
|
||||
rw::ReadAndWrite(src, *writer);
|
||||
}
|
||||
|
||||
void FilesContainerW::Write(void const * buffer, size_t size, Tag const & tag)
|
||||
{
|
||||
if (size != 0)
|
||||
GetWriter(tag)->Write(buffer, size);
|
||||
}
|
||||
|
||||
void FilesContainerW::Write(std::vector<char> const & buffer, Tag const & tag)
|
||||
{
|
||||
Write(buffer.data(), buffer.size(), tag);
|
||||
}
|
||||
|
||||
void FilesContainerW::Write(std::vector<uint8_t> const & buffer, Tag const & tag)
|
||||
{
|
||||
Write(buffer.data(), buffer.size(), tag);
|
||||
}
|
||||
|
||||
void FilesContainerW::Finish()
|
||||
{
|
||||
ASSERT(!m_finished, ());
|
||||
|
||||
uint64_t const curr = SaveCurrentSize();
|
||||
|
||||
FileWriter writer(m_name, FileWriter::OP_WRITE_EXISTING);
|
||||
writer.Seek(0);
|
||||
WriteToSink(writer, curr);
|
||||
writer.Seek(curr);
|
||||
|
||||
sort(m_info.begin(), m_info.end(), LessInfo());
|
||||
|
||||
rw::Write(writer, m_info);
|
||||
|
||||
m_finished = true;
|
||||
}
|
||||
253
libs/coding/files_container.hpp
Normal file
253
libs/coding/files_container.hpp
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/file_reader.hpp"
|
||||
#include "coding/file_writer.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
class FilesContainerBase
|
||||
{
|
||||
public:
|
||||
using Tag = std::string;
|
||||
|
||||
struct TagInfo
|
||||
{
|
||||
TagInfo() = default;
|
||||
TagInfo(Tag const & tag, uint64_t offset) : m_tag(tag), m_offset(offset) {}
|
||||
|
||||
Tag m_tag;
|
||||
uint64_t m_offset = 0;
|
||||
uint64_t m_size = 0;
|
||||
};
|
||||
|
||||
/// Alignment of each new section that will be added to a file
|
||||
/// container, i.e. section's offset in bytes will be a multiple of
|
||||
/// this value.
|
||||
///
|
||||
/// WARNING! Existing sections may not be properly aligned.
|
||||
static uint64_t constexpr kSectionAlignment = 8;
|
||||
|
||||
bool IsExist(Tag const & tag) const { return GetInfo(tag) != 0; }
|
||||
|
||||
template <typename ToDo>
|
||||
void ForEachTagInfo(ToDo && toDo) const
|
||||
{
|
||||
std::for_each(m_info.begin(), m_info.end(), std::forward<ToDo>(toDo));
|
||||
}
|
||||
|
||||
protected:
|
||||
struct LessInfo
|
||||
{
|
||||
bool operator()(TagInfo const & t1, TagInfo const & t2) const { return (t1.m_tag < t2.m_tag); }
|
||||
bool operator()(TagInfo const & t1, Tag const & t2) const { return (t1.m_tag < t2); }
|
||||
bool operator()(Tag const & t1, TagInfo const & t2) const { return (t1 < t2.m_tag); }
|
||||
};
|
||||
|
||||
struct LessOffset
|
||||
{
|
||||
bool operator()(TagInfo const & t1, TagInfo const & t2) const
|
||||
{
|
||||
if (t1.m_offset == t2.m_offset)
|
||||
{
|
||||
// Element with nonzero size should be the last one,
|
||||
// for correct append writer mode (FilesContainerW::GetWriter).
|
||||
return (t1.m_size < t2.m_size);
|
||||
}
|
||||
else
|
||||
return (t1.m_offset < t2.m_offset);
|
||||
}
|
||||
bool operator()(TagInfo const & t1, uint64_t const & t2) const { return (t1.m_offset < t2); }
|
||||
bool operator()(uint64_t const & t1, TagInfo const & t2) const { return (t1 < t2.m_offset); }
|
||||
};
|
||||
|
||||
class EqualTag
|
||||
{
|
||||
public:
|
||||
EqualTag(Tag const & tag) : m_tag(tag) {}
|
||||
bool operator()(TagInfo const & t) const { return (t.m_tag == m_tag); }
|
||||
|
||||
private:
|
||||
Tag const & m_tag;
|
||||
};
|
||||
|
||||
TagInfo const * GetInfo(Tag const & tag) const;
|
||||
|
||||
template <typename Reader>
|
||||
void ReadInfo(Reader & reader);
|
||||
|
||||
using InfoContainer = std::vector<TagInfo>;
|
||||
InfoContainer m_info;
|
||||
};
|
||||
|
||||
std::string DebugPrint(FilesContainerBase::TagInfo const & info);
|
||||
|
||||
class FilesContainerR : public FilesContainerBase
|
||||
{
|
||||
public:
|
||||
using TReader = ModelReaderPtr;
|
||||
|
||||
explicit FilesContainerR(std::string const & filePath, uint32_t logPageSize = 10, uint32_t logPageCount = 10);
|
||||
explicit FilesContainerR(TReader const & file);
|
||||
|
||||
TReader GetReader(Tag const & tag) const;
|
||||
|
||||
template <typename F>
|
||||
void ForEachTag(F && f) const
|
||||
{
|
||||
for (size_t i = 0; i < m_info.size(); ++i)
|
||||
f(m_info[i].m_tag);
|
||||
}
|
||||
|
||||
uint64_t GetFileSize() const { return m_source.Size(); }
|
||||
std::string const & GetFileName() const { return m_source.GetName(); }
|
||||
|
||||
std::pair<uint64_t, uint64_t> GetAbsoluteOffsetAndSize(Tag const & tag) const;
|
||||
|
||||
private:
|
||||
TReader m_source;
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
class MappedFile
|
||||
{
|
||||
public:
|
||||
MappedFile() = default;
|
||||
~MappedFile() { Close(); }
|
||||
|
||||
void Open(std::string const & fName);
|
||||
void Close();
|
||||
|
||||
class Handle
|
||||
{
|
||||
public:
|
||||
Handle() = default;
|
||||
|
||||
Handle(char const * base, char const * alignBase, uint64_t size, uint64_t origSize)
|
||||
: m_base(base)
|
||||
, m_origBase(alignBase)
|
||||
, m_size(size)
|
||||
, m_origSize(origSize)
|
||||
{}
|
||||
|
||||
Handle(Handle && h) { Assign(std::move(h)); }
|
||||
|
||||
Handle & operator=(Handle && h)
|
||||
{
|
||||
Assign(std::move(h));
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Handle();
|
||||
|
||||
void Assign(Handle && h);
|
||||
|
||||
void Unmap();
|
||||
|
||||
bool IsValid() const { return (m_base != 0); }
|
||||
uint64_t GetSize() const { return m_size; }
|
||||
|
||||
template <typename T>
|
||||
T const * GetData() const
|
||||
{
|
||||
ASSERT_EQUAL(m_size % sizeof(T), 0, ());
|
||||
return reinterpret_cast<T const *>(m_base);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t GetDataCount() const
|
||||
{
|
||||
ASSERT_EQUAL(m_size % sizeof(T), 0, ());
|
||||
return (m_size / sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
void Reset();
|
||||
|
||||
char const * m_base = nullptr;
|
||||
char const * m_origBase = nullptr;
|
||||
uint64_t m_size = 0;
|
||||
uint64_t m_origSize = 0;
|
||||
|
||||
DISALLOW_COPY(Handle);
|
||||
};
|
||||
|
||||
Handle Map(uint64_t offset, uint64_t size, std::string const & tag) const;
|
||||
|
||||
private:
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
void * m_hFile = (void *)-1;
|
||||
void * m_hMapping = (void *)-1;
|
||||
#else
|
||||
int m_fd = -1;
|
||||
#endif
|
||||
|
||||
DISALLOW_COPY(MappedFile);
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
class FilesMappingContainer : public FilesContainerBase
|
||||
{
|
||||
public:
|
||||
using Handle = detail::MappedFile::Handle;
|
||||
|
||||
/// Do nothing by default, call Open to attach to file.
|
||||
FilesMappingContainer() = default;
|
||||
explicit FilesMappingContainer(std::string const & fName);
|
||||
|
||||
~FilesMappingContainer();
|
||||
|
||||
void Open(std::string const & fName);
|
||||
void Close();
|
||||
|
||||
Handle Map(Tag const & tag) const;
|
||||
FileReader GetReader(Tag const & tag) const;
|
||||
|
||||
std::string const & GetName() const { return m_name; }
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
detail::MappedFile m_file;
|
||||
};
|
||||
|
||||
class FilesContainerW : public FilesContainerBase
|
||||
{
|
||||
public:
|
||||
FilesContainerW(std::string const & fName, FileWriter::Op op = FileWriter::OP_WRITE_TRUNCATE);
|
||||
~FilesContainerW();
|
||||
|
||||
std::unique_ptr<FilesContainerWriter> GetWriter(Tag const & tag);
|
||||
|
||||
void Write(std::string const & fPath, Tag const & tag);
|
||||
void Write(ModelReaderPtr reader, Tag const & tag);
|
||||
void Write(void const * buffer, size_t size, Tag const & tag);
|
||||
void Write(std::vector<char> const & buffer, Tag const & tag);
|
||||
void Write(std::vector<uint8_t> const & buffer, Tag const & tag);
|
||||
|
||||
void Finish();
|
||||
|
||||
/// Delete section with rewriting file.
|
||||
/// @precondition Container should be opened with FileWriter::OP_WRITE_EXISTING.
|
||||
void DeleteSection(Tag const & tag);
|
||||
|
||||
std::string const & GetFileName() const { return m_name; }
|
||||
|
||||
private:
|
||||
uint64_t SaveCurrentSize();
|
||||
|
||||
void Open(FileWriter::Op op);
|
||||
void StartNew();
|
||||
|
||||
std::string m_name;
|
||||
bool m_needRewrite;
|
||||
bool m_finished;
|
||||
};
|
||||
179
libs/coding/fixed_bits_ddvector.hpp
Normal file
179
libs/coding/fixed_bits_ddvector.hpp
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
#pragma once
|
||||
|
||||
#include "bit_streams.hpp"
|
||||
#include "byte_stream.hpp"
|
||||
#include "dd_vector.hpp"
|
||||
#include "reader.hpp"
|
||||
#include "write_to_sink.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
/// Disk driven vector for optimal storing small values with rare big values.
|
||||
/// Format:
|
||||
/// 4 bytes to store vector's size
|
||||
/// Buffer of ceil(Size * Bits / 8) bytes, e.g. vector of Bits-sized elements.
|
||||
/// - values in range [0, (1 << Bits) - 2] stored as is
|
||||
/// - value (1 << Bits) - 2 tells that actual value is stored in the exceptions table below.
|
||||
/// - value (1 << Bits) - 1 tells that the value is undefined.
|
||||
/// Buffer with exceptions table, e.g. vector of (index, value) pairs till the end of the reader,
|
||||
/// sorted by index parameter.
|
||||
/// Component is stored and used in host's endianness, without any conversions.
|
||||
|
||||
template <size_t Bits, /// number of fixed bits
|
||||
class TReader, /// reader with random offset read functions
|
||||
typename TSize = uint32_t, /// vector index type (platform independent)
|
||||
typename TValue = uint32_t /// vector value type (platform independent)
|
||||
>
|
||||
class FixedBitsDDVector
|
||||
{
|
||||
static_assert(std::is_unsigned<TSize>::value, "");
|
||||
static_assert(std::is_unsigned<TValue>::value, "");
|
||||
// 16 - is the maximum bits count to get all needed bits in random access within uint32_t.
|
||||
static_assert(Bits > 0, "");
|
||||
static_assert(Bits <= 16, "");
|
||||
|
||||
using TSelf = FixedBitsDDVector<Bits, TReader, TSize, TValue>;
|
||||
|
||||
struct IndexValue
|
||||
{
|
||||
TSize m_index;
|
||||
TValue m_value;
|
||||
bool operator<(IndexValue const & rhs) const { return m_index < rhs.m_index; }
|
||||
};
|
||||
|
||||
TReader m_bits;
|
||||
DDVector<IndexValue, TReader, TSize> m_vector;
|
||||
|
||||
#ifdef DEBUG
|
||||
TSize const m_size;
|
||||
#endif
|
||||
|
||||
using TBlock = uint32_t;
|
||||
|
||||
static uint64_t AlignBytesCount(uint64_t count) { return std::max(count, static_cast<uint64_t>(sizeof(TBlock))); }
|
||||
|
||||
static TBlock constexpr kMask = (1 << Bits) - 1;
|
||||
static TBlock constexpr kLargeValue = kMask - 1;
|
||||
static TBlock constexpr kUndefined = kMask;
|
||||
|
||||
TValue FindInVector(TSize index) const
|
||||
{
|
||||
auto const it = std::lower_bound(m_vector.begin(), m_vector.end(), IndexValue{index, 0});
|
||||
ASSERT(it != m_vector.end() && it->m_index == index, ());
|
||||
return it->m_value;
|
||||
}
|
||||
|
||||
FixedBitsDDVector(TReader const & bitsReader, TReader const & vecReader, TSize size)
|
||||
: m_bits(bitsReader)
|
||||
, m_vector(vecReader)
|
||||
#ifdef DEBUG
|
||||
, m_size(size)
|
||||
#endif
|
||||
{}
|
||||
|
||||
public:
|
||||
static std::unique_ptr<TSelf> Create(TReader const & reader)
|
||||
{
|
||||
TSize const size = ReadPrimitiveFromPos<TSize>(reader, 0);
|
||||
|
||||
uint64_t const off1 = sizeof(TSize);
|
||||
uint64_t const off2 = AlignBytesCount((size * Bits + CHAR_BIT - 1) / CHAR_BIT) + off1;
|
||||
|
||||
// We cannot use make_unique here because contsructor is private.
|
||||
return std::unique_ptr<TSelf>(
|
||||
new TSelf(reader.SubReader(off1, off2 - off1), reader.SubReader(off2, reader.Size() - off2), size));
|
||||
}
|
||||
|
||||
bool Get(TSize index, TValue & value) const
|
||||
{
|
||||
ASSERT_LESS(index, m_size, ());
|
||||
uint64_t const bitsOffset = index * Bits;
|
||||
|
||||
uint64_t bytesOffset = bitsOffset / CHAR_BIT;
|
||||
size_t constexpr kBlockSize = sizeof(TBlock);
|
||||
if (bytesOffset + kBlockSize > m_bits.Size())
|
||||
bytesOffset = m_bits.Size() - kBlockSize;
|
||||
|
||||
TBlock v = ReadPrimitiveFromPos<TBlock>(m_bits, bytesOffset);
|
||||
v >>= (bitsOffset - bytesOffset * CHAR_BIT);
|
||||
v &= kMask;
|
||||
if (v == kUndefined)
|
||||
return false;
|
||||
|
||||
value = v < kLargeValue ? v : FindInVector(index);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class TWriter>
|
||||
class Builder
|
||||
{
|
||||
using TData = std::vector<uint8_t>;
|
||||
using TempWriter = PushBackByteSink<TData>;
|
||||
using TBits = BitWriter<TempWriter>;
|
||||
|
||||
TData m_data;
|
||||
TempWriter m_writer;
|
||||
std::unique_ptr<TBits> m_bits;
|
||||
|
||||
std::vector<IndexValue> m_excepts;
|
||||
TSize m_count = 0;
|
||||
TSize m_optCount = 0;
|
||||
|
||||
TWriter & m_finalWriter;
|
||||
|
||||
public:
|
||||
using ValueType = TValue;
|
||||
|
||||
explicit Builder(TWriter & writer) : m_writer(m_data), m_bits(new TBits(m_writer)), m_finalWriter(writer) {}
|
||||
|
||||
~Builder()
|
||||
{
|
||||
// Final serialization is in dtor only.
|
||||
// You can't do any intermediate flushes during building vector.
|
||||
|
||||
// Reset the bit stream first.
|
||||
m_bits.reset();
|
||||
|
||||
// Write size of vector.
|
||||
WriteToSink(m_finalWriter, m_count);
|
||||
|
||||
// Write bits vector, alignes at least to 4 bytes.
|
||||
m_data.resize(AlignBytesCount(m_data.size()));
|
||||
m_finalWriter.Write(m_data.data(), m_data.size());
|
||||
|
||||
// Write exceptions table.
|
||||
m_finalWriter.Write(m_excepts.data(), m_excepts.size() * sizeof(IndexValue));
|
||||
}
|
||||
|
||||
void PushBack(TValue v)
|
||||
{
|
||||
if (v >= kLargeValue)
|
||||
{
|
||||
m_bits->WriteAtMost32Bits(kLargeValue, Bits);
|
||||
m_excepts.push_back({m_count, v});
|
||||
}
|
||||
else
|
||||
{
|
||||
++m_optCount;
|
||||
m_bits->WriteAtMost32Bits(v, Bits);
|
||||
}
|
||||
|
||||
++m_count;
|
||||
}
|
||||
|
||||
// Pushes a special (undefined) value.
|
||||
void PushBackUndefined()
|
||||
{
|
||||
m_bits->WriteAtMost32Bits(kUndefined, Bits);
|
||||
++m_optCount;
|
||||
++m_count;
|
||||
}
|
||||
|
||||
/// @return (number of stored as-is elements, number of all elements)
|
||||
std::pair<TSize, TSize> GetCount() const { return std::make_pair(m_optCount, m_count); }
|
||||
};
|
||||
};
|
||||
516
libs/coding/geometry_coding.cpp
Normal file
516
libs/coding/geometry_coding.cpp
Normal file
|
|
@ -0,0 +1,516 @@
|
|||
#include "coding/geometry_coding.hpp"
|
||||
|
||||
#include "coding/point_coding.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <complex>
|
||||
#include <stack>
|
||||
|
||||
namespace
|
||||
{
|
||||
inline m2::PointU ClampPoint(m2::PointD const & maxPoint, m2::PointD const & point)
|
||||
{
|
||||
using uvalue_t = m2::PointU::value_type;
|
||||
return {static_cast<uvalue_t>(math::Clamp(point.x, 0.0, maxPoint.x)),
|
||||
static_cast<uvalue_t>(math::Clamp(point.y, 0.0, maxPoint.y))};
|
||||
}
|
||||
|
||||
struct edge_less_p0
|
||||
{
|
||||
using edge_t = tesselator::Edge;
|
||||
|
||||
bool operator()(edge_t const & e1, edge_t const & e2) const
|
||||
{
|
||||
return (e1.m_p[0] == e2.m_p[0]) ? (e1.m_side < e2.m_side) : (e1.m_p[0] < e2.m_p[0]);
|
||||
}
|
||||
bool operator()(edge_t const & e1, int e2) const { return e1.m_p[0] < e2; }
|
||||
bool operator()(int e1, edge_t const & e2) const { return e1 < e2.m_p[0]; }
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace coding
|
||||
{
|
||||
bool TestDecoding(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT const & deltas,
|
||||
void (*fnDecode)(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points))
|
||||
{
|
||||
size_t const count = points.size();
|
||||
|
||||
std::vector<m2::PointU> decoded;
|
||||
decoded.resize(count);
|
||||
|
||||
OutPointsT decodedA(decoded);
|
||||
fnDecode(make_read_adapter(deltas), basePoint, maxPoint, decodedA);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
ASSERT_EQUAL(points[i], decoded[i], ());
|
||||
return true;
|
||||
}
|
||||
|
||||
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2)
|
||||
{
|
||||
return ClampPoint(maxPoint, m2::PointD(p1) + (m2::PointD(p1) - m2::PointD(p2)) / 2.0);
|
||||
}
|
||||
|
||||
uint64_t EncodePointDeltaAsUint(m2::PointU const & actual, m2::PointU const & prediction)
|
||||
{
|
||||
return bits::BitwiseMerge(bits::ZigZagEncode(static_cast<int32_t>(actual.x) - static_cast<int32_t>(prediction.x)),
|
||||
bits::ZigZagEncode(static_cast<int32_t>(actual.y) - static_cast<int32_t>(prediction.y)));
|
||||
}
|
||||
|
||||
m2::PointU DecodePointDeltaFromUint(uint64_t delta, m2::PointU const & prediction)
|
||||
{
|
||||
uint32_t x, y;
|
||||
bits::BitwiseSplit(delta, x, y);
|
||||
return m2::PointU(prediction.x + bits::ZigZagDecode(x), prediction.y + bits::ZigZagDecode(y));
|
||||
}
|
||||
|
||||
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
|
||||
m2::PointU const & p3)
|
||||
{
|
||||
CHECK_NOT_EQUAL(p2, p3, ());
|
||||
|
||||
using std::complex;
|
||||
complex<double> const c1(p1.x, p1.y);
|
||||
complex<double> const c2(p2.x, p2.y);
|
||||
complex<double> const c3(p3.x, p3.y);
|
||||
complex<double> const d = (c1 - c2) / (c2 - c3);
|
||||
complex<double> const c0 = c1 + (c1 - c2) * std::polar(0.5, 0.5 * arg(d));
|
||||
|
||||
/*
|
||||
complex<double> const c1(p1.x, p1.y);
|
||||
complex<double> const c2(p2.x, p2.y);
|
||||
complex<double> const c3(p3.x, p3.y);
|
||||
complex<double> const d = (c1 - c2) / (c2 - c3);
|
||||
complex<double> const c01 = c1 + (c1 - c2) * polar(0.5, arg(d));
|
||||
complex<double> const c02 = c1 + (c1 - c2) * complex<double>(0.5, 0.0);
|
||||
complex<double> const c0 = (c01 + c02) * complex<double>(0.5, 0.0);
|
||||
*/
|
||||
|
||||
return ClampPoint(maxPoint, {c0.real(), c0.imag()});
|
||||
}
|
||||
|
||||
m2::PointU PredictPointInTriangle(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
|
||||
m2::PointU const & p3)
|
||||
{
|
||||
// parallelogram prediction
|
||||
return ClampPoint(maxPoint, m2::PointD(p1 + p2) - m2::PointD(p3));
|
||||
}
|
||||
|
||||
void EncodePolylinePrev1(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas)
|
||||
{
|
||||
size_t const count = points.size();
|
||||
if (count > 0)
|
||||
{
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
|
||||
for (size_t i = 1; i < count; ++i)
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[i], points[i - 1]));
|
||||
}
|
||||
|
||||
ASSERT(TestDecoding(points, basePoint, maxPoint, deltas, &DecodePolylinePrev1), ());
|
||||
}
|
||||
|
||||
void DecodePolylinePrev1(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & /*maxPoint*/,
|
||||
OutPointsT & points)
|
||||
{
|
||||
size_t const count = deltas.size();
|
||||
if (count > 0)
|
||||
{
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
|
||||
for (size_t i = 1; i < count; ++i)
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[i], points.back()));
|
||||
}
|
||||
}
|
||||
|
||||
void EncodePolylinePrev2(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas)
|
||||
{
|
||||
size_t const count = points.size();
|
||||
if (count > 0)
|
||||
{
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
|
||||
if (count > 1)
|
||||
{
|
||||
m2::PointD const maxPointD(maxPoint);
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[1], points[0]));
|
||||
for (size_t i = 2; i < count; ++i)
|
||||
deltas.push_back(
|
||||
EncodePointDeltaAsUint(points[i], PredictPointInPolyline(maxPointD, points[i - 1], points[i - 2])));
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(TestDecoding(points, basePoint, maxPoint, deltas, &DecodePolylinePrev2), ());
|
||||
}
|
||||
|
||||
void DecodePolylinePrev2(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points)
|
||||
{
|
||||
size_t const count = deltas.size();
|
||||
if (count > 0)
|
||||
{
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
|
||||
if (count > 1)
|
||||
{
|
||||
m2::PointD const maxPointD(maxPoint);
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[1], points.back()));
|
||||
for (size_t i = 2; i < count; ++i)
|
||||
{
|
||||
size_t const n = points.size();
|
||||
points.push_back(
|
||||
DecodePointDeltaFromUint(deltas[i], PredictPointInPolyline(maxPointD, points[n - 1], points[n - 2])));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EncodePolylinePrev3(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(basePoint.x, maxPoint.x, (basePoint, maxPoint));
|
||||
ASSERT_LESS_OR_EQUAL(basePoint.y, maxPoint.y, (basePoint, maxPoint));
|
||||
|
||||
size_t const count = points.size();
|
||||
if (count > 0)
|
||||
{
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
|
||||
if (count > 1)
|
||||
{
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[1], points[0]));
|
||||
if (count > 2)
|
||||
{
|
||||
m2::PointD const maxPointD(maxPoint);
|
||||
m2::PointU const prediction = PredictPointInPolyline(maxPointD, points[1], points[0]);
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[2], prediction));
|
||||
for (size_t i = 3; i < count; ++i)
|
||||
{
|
||||
m2::PointU const prediction = PredictPointInPolyline(maxPointD, points[i - 1], points[i - 2], points[i - 3]);
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[i], prediction));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(TestDecoding(points, basePoint, maxPoint, deltas, &DecodePolylinePrev3), ());
|
||||
}
|
||||
|
||||
void DecodePolylinePrev3(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(basePoint.x, maxPoint.x, (basePoint, maxPoint));
|
||||
ASSERT_LESS_OR_EQUAL(basePoint.y, maxPoint.y, (basePoint, maxPoint));
|
||||
|
||||
size_t const count = deltas.size();
|
||||
if (count > 0)
|
||||
{
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
|
||||
if (count > 1)
|
||||
{
|
||||
m2::PointU const pt0 = points.back();
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[1], pt0));
|
||||
if (count > 2)
|
||||
{
|
||||
m2::PointD const maxPointD(maxPoint);
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[2], PredictPointInPolyline(maxPointD, points.back(), pt0)));
|
||||
for (size_t i = 3; i < count; ++i)
|
||||
{
|
||||
size_t const n = points.size();
|
||||
m2::PointU const prediction = PredictPointInPolyline(maxPointD, points[n - 1], points[n - 2], points[n - 3]);
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[i], prediction));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EncodePolyline(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas)
|
||||
{
|
||||
EncodePolylinePrev2(points, basePoint, maxPoint, deltas);
|
||||
}
|
||||
|
||||
void DecodePolyline(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points)
|
||||
{
|
||||
DecodePolylinePrev2(deltas, basePoint, maxPoint, points);
|
||||
}
|
||||
|
||||
void EncodeTriangleStrip(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas)
|
||||
{
|
||||
size_t const count = points.size();
|
||||
if (count > 0)
|
||||
{
|
||||
ASSERT_GREATER(count, 2, ());
|
||||
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[1], points[0]));
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[2], points[1]));
|
||||
|
||||
m2::PointD const maxPointD(maxPoint);
|
||||
for (size_t i = 3; i < count; ++i)
|
||||
{
|
||||
m2::PointU const prediction = PredictPointInTriangle(maxPointD, points[i - 1], points[i - 2], points[i - 3]);
|
||||
deltas.push_back(EncodePointDeltaAsUint(points[i], prediction));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeTriangleStrip(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points)
|
||||
{
|
||||
size_t const count = deltas.size();
|
||||
if (count > 0)
|
||||
{
|
||||
ASSERT_GREATER(count, 2, ());
|
||||
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[1], points.back()));
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[2], points.back()));
|
||||
|
||||
m2::PointD const maxPointD(maxPoint);
|
||||
for (size_t i = 3; i < count; ++i)
|
||||
{
|
||||
size_t const n = points.size();
|
||||
m2::PointU const prediction = PredictPointInTriangle(maxPointD, points[n - 1], points[n - 2], points[n - 3]);
|
||||
points.push_back(DecodePointDeltaFromUint(deltas[i], prediction));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace coding
|
||||
|
||||
namespace serial
|
||||
{
|
||||
// GeometryCodingParams ----------------------------------------------------------------------------
|
||||
GeometryCodingParams::GeometryCodingParams() : m_BasePointUint64(0), m_CoordBits(kPointCoordBits)
|
||||
{
|
||||
m_BasePoint = Uint64ToPointUObsolete(m_BasePointUint64);
|
||||
}
|
||||
|
||||
GeometryCodingParams::GeometryCodingParams(uint8_t coordBits, m2::PointD const & pt) : m_CoordBits(coordBits)
|
||||
{
|
||||
SetBasePoint(pt);
|
||||
}
|
||||
|
||||
GeometryCodingParams::GeometryCodingParams(uint8_t coordBits, uint64_t basePointUint64)
|
||||
: m_BasePointUint64(basePointUint64)
|
||||
, m_CoordBits(coordBits)
|
||||
{
|
||||
m_BasePoint = Uint64ToPointUObsolete(m_BasePointUint64);
|
||||
}
|
||||
|
||||
void GeometryCodingParams::SetBasePoint(m2::PointD const & pt)
|
||||
{
|
||||
m_BasePoint = PointDToPointU(pt, m_CoordBits);
|
||||
m_BasePointUint64 = PointUToUint64Obsolete(m_BasePoint);
|
||||
}
|
||||
|
||||
namespace pts
|
||||
{
|
||||
m2::PointU D2U(m2::PointD const & p, uint32_t coordBits)
|
||||
{
|
||||
return PointDToPointU(p, coordBits);
|
||||
}
|
||||
|
||||
m2::PointD U2D(m2::PointU const & p, uint32_t coordBits)
|
||||
{
|
||||
m2::PointD const pt = PointUToPointD(p, coordBits);
|
||||
ASSERT(mercator::Bounds::kMinX <= pt.x && pt.y <= mercator::Bounds::kMaxX, (p, pt, coordBits));
|
||||
ASSERT(mercator::Bounds::kMinY <= pt.x && pt.y <= mercator::Bounds::kMaxY, (p, pt, coordBits));
|
||||
return pt;
|
||||
}
|
||||
|
||||
m2::PointU GetMaxPoint(GeometryCodingParams const & params)
|
||||
{
|
||||
return D2U(m2::PointD(mercator::Bounds::kMaxX, mercator::Bounds::kMaxY), params.GetCoordBits());
|
||||
}
|
||||
|
||||
m2::PointU GetBasePoint(GeometryCodingParams const & params)
|
||||
{
|
||||
return params.GetBasePoint();
|
||||
}
|
||||
} // namespace pts
|
||||
|
||||
void Encode(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params,
|
||||
DeltasT & deltas)
|
||||
{
|
||||
size_t const count = points.size();
|
||||
|
||||
pts::PointsU upoints;
|
||||
upoints.reserve(count);
|
||||
|
||||
transform(points.begin(), points.end(), std::back_inserter(upoints),
|
||||
std::bind(&pts::D2U, std::placeholders::_1, params.GetCoordBits()));
|
||||
|
||||
ASSERT(deltas.empty(), ());
|
||||
deltas.resize(count);
|
||||
|
||||
coding::OutDeltasT adapt(deltas);
|
||||
(*fn)(make_read_adapter(upoints), pts::GetBasePoint(params), pts::GetMaxPoint(params), adapt);
|
||||
}
|
||||
|
||||
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params, OutPointsT & points,
|
||||
size_t reserveF)
|
||||
{
|
||||
DecodeImpl(fn, deltas, params, points, reserveF);
|
||||
}
|
||||
|
||||
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params,
|
||||
std::vector<m2::PointD> & points, size_t reserveF)
|
||||
{
|
||||
DecodeImpl(fn, deltas, params, points, reserveF);
|
||||
}
|
||||
|
||||
void const * LoadInner(DecodeFunT fn, void const * pBeg, size_t count, GeometryCodingParams const & params,
|
||||
OutPointsT & points)
|
||||
{
|
||||
DeltasT deltas;
|
||||
deltas.reserve(count);
|
||||
void const * ret = ReadVarUint64Array(static_cast<char const *>(pBeg), count, base::MakeBackInsertFunctor(deltas));
|
||||
|
||||
Decode(fn, deltas, params, points);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TrianglesChainSaver::TrianglesChainSaver(GeometryCodingParams const & params)
|
||||
{
|
||||
m_base = pts::GetBasePoint(params);
|
||||
m_max = pts::GetMaxPoint(params);
|
||||
}
|
||||
|
||||
void TrianglesChainSaver::operator()(TPoint arr[3], std::vector<TEdge> edges)
|
||||
{
|
||||
m_buffers.push_back(TBuffer());
|
||||
MemWriter<TBuffer> writer(m_buffers.back());
|
||||
|
||||
WriteVarUint(writer, coding::EncodePointDeltaAsUint(arr[0], m_base));
|
||||
WriteVarUint(writer, coding::EncodePointDeltaAsUint(arr[1], arr[0]));
|
||||
|
||||
TEdge curr = edges.front();
|
||||
curr.m_delta = coding::EncodePointDeltaAsUint(arr[2], arr[1]);
|
||||
|
||||
sort(edges.begin(), edges.end(), edge_less_p0());
|
||||
|
||||
std::stack<TEdge> st;
|
||||
while (true)
|
||||
{
|
||||
CHECK_EQUAL(curr.m_delta >> 62, 0, ());
|
||||
uint64_t delta = curr.m_delta << 2;
|
||||
|
||||
// find next edges
|
||||
int const nextNode = curr.m_p[1];
|
||||
auto i = lower_bound(edges.begin(), edges.end(), nextNode, edge_less_p0());
|
||||
bool const found = (i != edges.end() && i->m_p[0] == nextNode);
|
||||
if (found)
|
||||
{
|
||||
// fill 2 tree-struct bites
|
||||
ASSERT_NOT_EQUAL(i->m_side, -1, ());
|
||||
|
||||
uint64_t const one = 1;
|
||||
|
||||
// first child
|
||||
delta |= (one << i->m_side);
|
||||
|
||||
std::vector<TEdge>::iterator j = i + 1;
|
||||
if (j != edges.end() && j->m_p[0] == nextNode)
|
||||
{
|
||||
// second child
|
||||
ASSERT_EQUAL(i->m_side, 0, ());
|
||||
ASSERT_EQUAL(j->m_side, 1, ());
|
||||
|
||||
delta |= (one << j->m_side);
|
||||
|
||||
// push to stack for further processing
|
||||
st.push(*j);
|
||||
}
|
||||
|
||||
curr = *i;
|
||||
}
|
||||
|
||||
// write delta for current element
|
||||
WriteVarUint(writer, delta);
|
||||
|
||||
if (!found)
|
||||
{
|
||||
// end of chain - pop current from stack or exit
|
||||
if (st.empty())
|
||||
break;
|
||||
else
|
||||
{
|
||||
curr = st.top();
|
||||
st.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeTriangles(coding::InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
coding::OutPointsT & points)
|
||||
{
|
||||
size_t const count = deltas.size();
|
||||
ASSERT_GREATER(count, 2, ());
|
||||
|
||||
m2::PointD const maxPointD(maxPoint);
|
||||
|
||||
points.push_back(coding::DecodePointDeltaFromUint(deltas[0], basePoint));
|
||||
points.push_back(coding::DecodePointDeltaFromUint(deltas[1], points.back()));
|
||||
points.push_back(coding::DecodePointDeltaFromUint(deltas[2] >> 2, points.back()));
|
||||
|
||||
std::stack<size_t> st;
|
||||
|
||||
size_t ind = 2;
|
||||
uint8_t treeBits = deltas[2] & 3;
|
||||
|
||||
for (size_t i = 3; i < count;)
|
||||
{
|
||||
// points 0, 1 - is a common edge
|
||||
// point 2 - is an opposite point for new triangle to calculate prediction
|
||||
size_t trg[3];
|
||||
|
||||
if (treeBits & 1)
|
||||
{
|
||||
// common edge is 1->2
|
||||
trg[0] = ind;
|
||||
trg[1] = ind - 1;
|
||||
trg[2] = ind - 2;
|
||||
|
||||
// push to stack for further processing
|
||||
if (treeBits & 2)
|
||||
st.push(ind);
|
||||
}
|
||||
else if (treeBits & 2)
|
||||
{
|
||||
// common edge is 2->0
|
||||
trg[0] = ind - 2;
|
||||
trg[1] = ind;
|
||||
trg[2] = ind - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// end of chain - pop current from stack
|
||||
ASSERT(!st.empty(), ());
|
||||
ind = st.top();
|
||||
st.pop();
|
||||
treeBits = 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
// push points
|
||||
points.push_back(points[trg[0]]);
|
||||
points.push_back(points[trg[1]]);
|
||||
points.push_back(coding::DecodePointDeltaFromUint(
|
||||
deltas[i] >> 2, coding::PredictPointInTriangle(maxPointD, points[trg[0]], points[trg[1]], points[trg[2]])));
|
||||
|
||||
// next step
|
||||
treeBits = deltas[i] & 3;
|
||||
ind = points.size() - 1;
|
||||
++i;
|
||||
}
|
||||
|
||||
ASSERT(treeBits == 0 && st.empty(), ());
|
||||
}
|
||||
} // namespace serial
|
||||
356
libs/coding/geometry_coding.hpp
Normal file
356
libs/coding/geometry_coding.hpp
Normal file
|
|
@ -0,0 +1,356 @@
|
|||
#pragma once
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
|
||||
#include "coding/point_coding.hpp"
|
||||
#include "coding/tesselator_decl.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/array_adapters.hpp"
|
||||
#include "base/assert.hpp"
|
||||
#include "base/buffer_vector.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
using InPointsT = array_read<m2::PointU>;
|
||||
using InDeltasT = array_read<uint64_t>;
|
||||
using OutPointsT = array_write<m2::PointU>;
|
||||
using OutDeltasT = array_write<uint64_t>;
|
||||
|
||||
// Stores the difference of two points to a single unsigned 64-bit integer.
|
||||
// It is not recommended to use this function: consider EncodePointDelta instead.
|
||||
uint64_t EncodePointDeltaAsUint(m2::PointU const & actual, m2::PointU const & prediction);
|
||||
|
||||
m2::PointU DecodePointDeltaFromUint(uint64_t delta, m2::PointU const & prediction);
|
||||
|
||||
// Writes the difference of two 2d vectors to sink.
|
||||
template <typename Sink>
|
||||
void EncodePointDelta(Sink & sink, m2::PointU const & curr, m2::PointU const & next)
|
||||
{
|
||||
auto const dx = base::asserted_cast<int32_t>(next.x) - base::asserted_cast<int32_t>(curr.x);
|
||||
auto const dy = base::asserted_cast<int32_t>(next.y) - base::asserted_cast<int32_t>(curr.y);
|
||||
WriteVarInt(sink, dx);
|
||||
WriteVarInt(sink, dy);
|
||||
}
|
||||
|
||||
// Reads the encoded difference from |source| and returns the
|
||||
// point equal to |base| + difference.
|
||||
template <typename Source>
|
||||
m2::PointU DecodePointDelta(Source & source, m2::PointU const & base)
|
||||
{
|
||||
auto const dx = ReadVarInt<int32_t>(source);
|
||||
auto const dy = ReadVarInt<int32_t>(source);
|
||||
ASSERT(int(base.x) + dx >= 0 && int(base.y) + dy >= 0, (base, dx, dy));
|
||||
return m2::PointU(base.x + dx, base.y + dy);
|
||||
}
|
||||
|
||||
/// Predict next point for polyline with given previous points (p1, p2).
|
||||
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2);
|
||||
|
||||
/// Predict next point for polyline with given previous points (p1, p2, p3).
|
||||
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
|
||||
m2::PointU const & p3);
|
||||
|
||||
/// Predict point for neighbour triangle with given
|
||||
/// previous triangle (p1, p2, p3) and common edge (p1, p2).
|
||||
m2::PointU PredictPointInTriangle(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
|
||||
m2::PointU const & p3);
|
||||
|
||||
void EncodePolylinePrev1(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas);
|
||||
|
||||
void DecodePolylinePrev1(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points);
|
||||
|
||||
void EncodePolylinePrev2(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas);
|
||||
|
||||
void DecodePolylinePrev2(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points);
|
||||
|
||||
void EncodePolylinePrev3(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas);
|
||||
|
||||
void DecodePolylinePrev3(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points);
|
||||
|
||||
void EncodePolyline(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas);
|
||||
|
||||
void DecodePolyline(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points);
|
||||
|
||||
void EncodeTriangleStrip(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutDeltasT & deltas);
|
||||
|
||||
void DecodeTriangleStrip(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
OutPointsT & points);
|
||||
} // namespace coding
|
||||
|
||||
namespace serial
|
||||
{
|
||||
class GeometryCodingParams
|
||||
{
|
||||
public:
|
||||
GeometryCodingParams();
|
||||
GeometryCodingParams(uint8_t coordBits, m2::PointD const & pt);
|
||||
GeometryCodingParams(uint8_t coordBits, uint64_t basePointUint64);
|
||||
|
||||
m2::PointU GetBasePoint() const { return m_BasePoint; }
|
||||
uint64_t GetBasePointUint64() const { return m_BasePointUint64; }
|
||||
int64_t GetBasePointInt64() const { return static_cast<int64_t>(m_BasePointUint64); }
|
||||
|
||||
void SetBasePoint(m2::PointD const & pt);
|
||||
|
||||
uint8_t GetCoordBits() const { return m_CoordBits; }
|
||||
|
||||
template <typename WriterT>
|
||||
void Save(WriterT & writer) const
|
||||
{
|
||||
WriteVarUint(writer, GetCoordBits());
|
||||
WriteVarUint(writer, m_BasePointUint64);
|
||||
}
|
||||
|
||||
template <typename SourceT>
|
||||
void Load(SourceT & src)
|
||||
{
|
||||
uint32_t const coordBits = ReadVarUint<uint32_t>(src);
|
||||
ASSERT_LESS(coordBits, 32, ());
|
||||
*this = GeometryCodingParams(coordBits, ReadVarUint<uint64_t>(src));
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t m_BasePointUint64;
|
||||
m2::PointU m_BasePoint;
|
||||
uint8_t m_CoordBits;
|
||||
};
|
||||
|
||||
namespace pts
|
||||
{
|
||||
using PointsU = buffer_vector<m2::PointU, 32>;
|
||||
|
||||
m2::PointU D2U(m2::PointD const & p, uint32_t coordBits);
|
||||
|
||||
m2::PointD U2D(m2::PointU const & p, uint32_t coordBits);
|
||||
|
||||
m2::PointU GetMaxPoint(GeometryCodingParams const & params);
|
||||
|
||||
m2::PointU GetBasePoint(GeometryCodingParams const & params);
|
||||
} // namespace pts
|
||||
|
||||
/// @name Encode and Decode function types.
|
||||
typedef void (*EncodeFunT)(coding::InPointsT const &, m2::PointU const &, m2::PointU const &, coding::OutDeltasT &);
|
||||
typedef void (*DecodeFunT)(coding::InDeltasT const &, m2::PointU const &, m2::PointU const &, coding::OutPointsT &);
|
||||
|
||||
using DeltasT = buffer_vector<uint64_t, 32>;
|
||||
using OutPointsT = buffer_vector<m2::PointD, 32>;
|
||||
|
||||
void Encode(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params,
|
||||
DeltasT & deltas);
|
||||
|
||||
/// @name Overloads for different out container types.
|
||||
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params, OutPointsT & points,
|
||||
size_t reserveF = 1);
|
||||
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params,
|
||||
std::vector<m2::PointD> & points, size_t reserveF = 1);
|
||||
|
||||
template <class TDecodeFun, class TOutPoints>
|
||||
void DecodeImpl(TDecodeFun fn, DeltasT const & deltas, GeometryCodingParams const & params, TOutPoints & points,
|
||||
size_t reserveF)
|
||||
{
|
||||
size_t const count = deltas.size() * reserveF;
|
||||
|
||||
pts::PointsU upoints;
|
||||
upoints.resize(count);
|
||||
|
||||
coding::OutPointsT adapt(upoints);
|
||||
(*fn)(make_read_adapter(deltas), pts::GetBasePoint(params), pts::GetMaxPoint(params), adapt);
|
||||
|
||||
if (points.size() < 2)
|
||||
{
|
||||
// Do not call reserve when loading triangles - they are accumulated to one vector.
|
||||
points.reserve(count);
|
||||
}
|
||||
|
||||
std::transform(upoints.begin(), upoints.begin() + adapt.size(), std::back_inserter(points),
|
||||
std::bind(&pts::U2D, std::placeholders::_1, params.GetCoordBits()));
|
||||
}
|
||||
|
||||
template <class TSink>
|
||||
void SavePoint(TSink & sink, m2::PointD const & pt, GeometryCodingParams const & cp)
|
||||
{
|
||||
WriteVarUint(sink, coding::EncodePointDeltaAsUint(PointDToPointU(pt, cp.GetCoordBits()), cp.GetBasePoint()));
|
||||
}
|
||||
|
||||
template <class TSource>
|
||||
m2::PointD LoadPoint(TSource & src, GeometryCodingParams const & cp)
|
||||
{
|
||||
m2::PointD const pt = PointUToPointD(coding::DecodePointDeltaFromUint(ReadVarUint<uint64_t>(src), cp.GetBasePoint()),
|
||||
cp.GetCoordBits());
|
||||
return pt;
|
||||
}
|
||||
|
||||
template <class TSink>
|
||||
void SaveInner(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
|
||||
{
|
||||
DeltasT deltas;
|
||||
Encode(fn, points, params, deltas);
|
||||
WriteVarUintArray(deltas, sink);
|
||||
}
|
||||
|
||||
template <class TSink>
|
||||
void WriteBufferToSink(std::vector<char> const & buffer, TSink & sink)
|
||||
{
|
||||
uint32_t const count = static_cast<uint32_t>(buffer.size());
|
||||
WriteVarUint(sink, count);
|
||||
sink.Write(&buffer[0], count);
|
||||
}
|
||||
|
||||
template <class TSink>
|
||||
void SaveOuter(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
|
||||
{
|
||||
DeltasT deltas;
|
||||
Encode(fn, points, params, deltas);
|
||||
|
||||
std::vector<char> buffer;
|
||||
MemWriter<std::vector<char>> writer(buffer);
|
||||
WriteVarUintArray(deltas, writer);
|
||||
|
||||
WriteBufferToSink(buffer, sink);
|
||||
}
|
||||
|
||||
void const * LoadInner(DecodeFunT fn, void const * pBeg, size_t count, GeometryCodingParams const & params,
|
||||
OutPointsT & points);
|
||||
|
||||
template <class TSource, class TPoints>
|
||||
void LoadOuter(DecodeFunT fn, TSource & src, GeometryCodingParams const & params, TPoints & points, size_t reserveF = 1)
|
||||
{
|
||||
uint32_t const count = ReadVarUint<uint32_t>(src);
|
||||
std::vector<char> buffer(count);
|
||||
char * p = &buffer[0];
|
||||
src.Read(p, count);
|
||||
|
||||
DeltasT deltas;
|
||||
deltas.reserve(count / 2);
|
||||
ReadVarUint64Array(p, p + count, base::MakeBackInsertFunctor(deltas));
|
||||
|
||||
Decode(fn, deltas, params, points, reserveF);
|
||||
}
|
||||
|
||||
/// @name Paths.
|
||||
template <class TSink>
|
||||
void SaveInnerPath(std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
|
||||
{
|
||||
SaveInner(&coding::EncodePolyline, points, params, sink);
|
||||
}
|
||||
|
||||
template <class TSink>
|
||||
void SaveOuterPath(std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
|
||||
{
|
||||
SaveOuter(&coding::EncodePolyline, points, params, sink);
|
||||
}
|
||||
|
||||
inline void const * LoadInnerPath(void const * pBeg, size_t count, GeometryCodingParams const & params,
|
||||
OutPointsT & points)
|
||||
{
|
||||
return LoadInner(&coding::DecodePolyline, pBeg, count, params, points);
|
||||
}
|
||||
|
||||
template <class TSource, class TPoints>
|
||||
void LoadOuterPath(TSource & src, GeometryCodingParams const & params, TPoints & points)
|
||||
{
|
||||
LoadOuter(&coding::DecodePolyline, src, params, points);
|
||||
}
|
||||
|
||||
/// @name Triangles.
|
||||
template <class TSink>
|
||||
void SaveInnerTriangles(std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
|
||||
{
|
||||
SaveInner(&coding::EncodeTriangleStrip, points, params, sink);
|
||||
}
|
||||
|
||||
inline void StripToTriangles(size_t count, OutPointsT const & strip, OutPointsT & triangles)
|
||||
{
|
||||
CHECK_GREATER_OR_EQUAL(count, 2, ());
|
||||
triangles.clear();
|
||||
triangles.reserve((count - 2) * 3);
|
||||
for (size_t i = 2; i < count; ++i)
|
||||
{
|
||||
triangles.push_back(strip[i - 2]);
|
||||
triangles.push_back(strip[i - 1]);
|
||||
triangles.push_back(strip[i]);
|
||||
}
|
||||
}
|
||||
|
||||
inline void const * LoadInnerTriangles(void const * pBeg, size_t count, GeometryCodingParams const & params,
|
||||
OutPointsT & triangles)
|
||||
{
|
||||
CHECK_GREATER_OR_EQUAL(count, 2, ());
|
||||
OutPointsT points;
|
||||
void const * res = LoadInner(&coding::DecodeTriangleStrip, pBeg, count, params, points);
|
||||
|
||||
StripToTriangles(count, points, triangles);
|
||||
return res;
|
||||
}
|
||||
|
||||
void DecodeTriangles(coding::InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
|
||||
coding::OutPointsT & triangles);
|
||||
|
||||
template <class TSource>
|
||||
void LoadOuterTriangles(TSource & src, GeometryCodingParams const & params, OutPointsT & triangles)
|
||||
{
|
||||
uint32_t const count = ReadVarUint<uint32_t>(src);
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i)
|
||||
LoadOuter(&DecodeTriangles, src, params, triangles, 3);
|
||||
}
|
||||
|
||||
class TrianglesChainSaver
|
||||
{
|
||||
using TPoint = m2::PointU;
|
||||
using TEdge = tesselator::Edge;
|
||||
using TBuffer = std::vector<char>;
|
||||
|
||||
TPoint m_base;
|
||||
TPoint m_max;
|
||||
|
||||
std::list<TBuffer> m_buffers;
|
||||
|
||||
public:
|
||||
explicit TrianglesChainSaver(GeometryCodingParams const & params);
|
||||
|
||||
TPoint GetBasePoint() const { return m_base; }
|
||||
TPoint GetMaxPoint() const { return m_max; }
|
||||
|
||||
void operator()(TPoint arr[3], std::vector<TEdge> edges);
|
||||
|
||||
size_t GetBufferSize() const
|
||||
{
|
||||
size_t sz = 0;
|
||||
for (auto const & i : m_buffers)
|
||||
sz += i.size();
|
||||
return sz;
|
||||
}
|
||||
|
||||
template <class TSink>
|
||||
void Save(TSink & sink)
|
||||
{
|
||||
// Not necessary assumption that 3-bytes varuint
|
||||
// is enough for triangle chains count.
|
||||
size_t const count = m_buffers.size();
|
||||
CHECK_LESS_OR_EQUAL(count, 0x1FFFFF, ());
|
||||
|
||||
WriteVarUint(sink, static_cast<uint32_t>(count));
|
||||
|
||||
std::for_each(m_buffers.begin(), m_buffers.end(),
|
||||
std::bind(&WriteBufferToSink<TSink>, std::placeholders::_1, std::ref(sink)));
|
||||
}
|
||||
};
|
||||
} // namespace serial
|
||||
48
libs/coding/hex.cpp
Normal file
48
libs/coding/hex.cpp
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
#include "coding/hex.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
namespace impl
|
||||
{
|
||||
static char constexpr kToHexTable[] = "0123456789ABCDEF";
|
||||
|
||||
void ToHexRaw(void const * src, size_t size, void * dst)
|
||||
{
|
||||
uint8_t const * ptr = static_cast<uint8_t const *>(src);
|
||||
uint8_t const * end = ptr + size;
|
||||
uint8_t * out = static_cast<uint8_t *>(dst);
|
||||
|
||||
while (ptr != end)
|
||||
{
|
||||
*out++ = kToHexTable[(*ptr) >> 4];
|
||||
*out++ = kToHexTable[(*ptr) & 0xF];
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t HexDigitToRaw(uint8_t const digit)
|
||||
{
|
||||
if (digit >= '0' && digit <= '9')
|
||||
return (digit - '0');
|
||||
else if (digit >= 'A' && digit <= 'F')
|
||||
return (digit - 'A' + 10);
|
||||
else if (digit >= 'a' && digit <= 'f')
|
||||
return (digit - 'a' + 10);
|
||||
ASSERT(false, (digit));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void FromHexRaw(void const * src, size_t size, void * dst)
|
||||
{
|
||||
uint8_t const * ptr = static_cast<uint8_t const *>(src);
|
||||
uint8_t const * end = ptr + size;
|
||||
uint8_t * out = static_cast<uint8_t *>(dst);
|
||||
|
||||
while (ptr < end)
|
||||
{
|
||||
*out = HexDigitToRaw(*ptr++) << 4;
|
||||
*out |= HexDigitToRaw(*ptr++);
|
||||
++out;
|
||||
}
|
||||
}
|
||||
} // namespace impl
|
||||
106
libs/coding/hex.hpp
Normal file
106
libs/coding/hex.hpp
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/base.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace impl
|
||||
{
|
||||
void ToHexRaw(void const * src, size_t size, void * dst);
|
||||
void FromHexRaw(void const * src, size_t size, void * dst);
|
||||
} // namespace impl
|
||||
|
||||
inline std::string ToHex(void const * ptr, size_t size)
|
||||
{
|
||||
std::string result;
|
||||
if (size == 0)
|
||||
return result;
|
||||
|
||||
result.resize(size * 2);
|
||||
::impl::ToHexRaw(ptr, size, &result[0]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename ContainerT>
|
||||
inline std::string ToHex(ContainerT const & container)
|
||||
{
|
||||
static_assert(sizeof(*container.begin()) == 1, "");
|
||||
|
||||
if (container.empty())
|
||||
return {};
|
||||
|
||||
return ToHex(&*container.begin(), container.end() - container.begin());
|
||||
}
|
||||
|
||||
/// Conversion with specializations to avoid warnings
|
||||
/// @{
|
||||
template <typename IntT>
|
||||
inline std::string NumToHex(IntT n)
|
||||
{
|
||||
static_assert(std::is_integral<IntT>::value, "");
|
||||
|
||||
uint8_t buf[sizeof(n)];
|
||||
|
||||
for (size_t i = 0; i < sizeof(n); ++i)
|
||||
{
|
||||
buf[i] = (n >> ((sizeof(n) - 1) * 8));
|
||||
n <<= 8;
|
||||
}
|
||||
|
||||
return ToHex(buf, sizeof(buf));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::string NumToHex<int8_t>(int8_t c)
|
||||
{
|
||||
return ToHex(&c, sizeof(c));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::string NumToHex<uint8_t>(uint8_t c)
|
||||
{
|
||||
return ToHex(&c, sizeof(c));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::string NumToHex<char>(char c)
|
||||
{
|
||||
return ToHex(&c, sizeof(c));
|
||||
}
|
||||
/// @}
|
||||
|
||||
inline std::string FromHex(std::string_view s)
|
||||
{
|
||||
std::string result;
|
||||
result.resize(s.size() / 2);
|
||||
::impl::FromHexRaw(s.data(), s.size(), &result[0]);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string ByteToQuat(uint8_t n)
|
||||
{
|
||||
std::string result;
|
||||
for (size_t i = 0; i < 4; ++i)
|
||||
{
|
||||
result += char(((n & 0xC0) >> 6) + '0');
|
||||
n <<= 2;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename IntT>
|
||||
inline std::string NumToQuat(IntT n)
|
||||
{
|
||||
std::string result;
|
||||
for (size_t i = 0; i < sizeof(n); ++i)
|
||||
{
|
||||
uint8_t ub = n >> (sizeof(n) * 8 - 8);
|
||||
result += ByteToQuat(ub);
|
||||
n <<= 8;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
112
libs/coding/huffman.cpp
Normal file
112
libs/coding/huffman.cpp
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
#include "coding/huffman.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
HuffmanCoder::~HuffmanCoder()
|
||||
{
|
||||
DeleteHuffmanTree(m_root);
|
||||
}
|
||||
|
||||
bool HuffmanCoder::Encode(uint32_t symbol, Code & code) const
|
||||
{
|
||||
auto it = m_encoderTable.find(symbol);
|
||||
if (it == m_encoderTable.end())
|
||||
return false;
|
||||
code = it->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HuffmanCoder::Decode(Code const & code, uint32_t & symbol) const
|
||||
{
|
||||
auto it = m_decoderTable.find(code);
|
||||
if (it == m_decoderTable.end())
|
||||
return false;
|
||||
symbol = it->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
void HuffmanCoder::BuildTables(Node * root, uint32_t path)
|
||||
{
|
||||
if (!root)
|
||||
return;
|
||||
if (root->isLeaf)
|
||||
{
|
||||
Code code(path, root->depth);
|
||||
m_encoderTable[root->symbol] = code;
|
||||
m_decoderTable[code] = root->symbol;
|
||||
return;
|
||||
}
|
||||
BuildTables(root->l, path);
|
||||
BuildTables(root->r, path + (static_cast<uint32_t>(1) << root->depth));
|
||||
}
|
||||
|
||||
void HuffmanCoder::Clear()
|
||||
{
|
||||
DeleteHuffmanTree(m_root);
|
||||
m_root = nullptr;
|
||||
m_encoderTable.clear();
|
||||
m_decoderTable.clear();
|
||||
}
|
||||
|
||||
void HuffmanCoder::DeleteHuffmanTree(Node * root)
|
||||
{
|
||||
if (!root)
|
||||
return;
|
||||
DeleteHuffmanTree(root->l);
|
||||
DeleteHuffmanTree(root->r);
|
||||
delete root;
|
||||
}
|
||||
|
||||
void HuffmanCoder::BuildHuffmanTree(Freqs const & freqs)
|
||||
{
|
||||
std::priority_queue<Node *, std::vector<Node *>, NodeComparator> pq;
|
||||
for (auto const & e : freqs.GetTable())
|
||||
pq.push(new Node(e.first, e.second, true /* isLeaf */));
|
||||
|
||||
if (pq.empty())
|
||||
{
|
||||
m_root = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
while (pq.size() > 1)
|
||||
{
|
||||
auto a = pq.top();
|
||||
pq.pop();
|
||||
auto b = pq.top();
|
||||
pq.pop();
|
||||
if (a->symbol > b->symbol)
|
||||
std::swap(a, b);
|
||||
// Give it the smaller symbol to make the resulting encoding more predictable.
|
||||
auto ab = new Node(a->symbol, a->freq + b->freq, false /* isLeaf */);
|
||||
ab->l = a;
|
||||
ab->r = b;
|
||||
pq.push(ab);
|
||||
}
|
||||
|
||||
m_root = pq.top();
|
||||
pq.pop();
|
||||
|
||||
SetDepths(m_root, 0 /* depth */);
|
||||
}
|
||||
|
||||
void HuffmanCoder::SetDepths(Node * root, uint32_t depth)
|
||||
{
|
||||
// One would need more than 2^32 symbols to build a code that long.
|
||||
// On the other hand, 32 is short enough for our purposes, so do not
|
||||
// try to shrink the trees beyond this threshold.
|
||||
uint32_t constexpr kMaxDepth = 32;
|
||||
|
||||
if (!root)
|
||||
return;
|
||||
CHECK_LESS_OR_EQUAL(depth, kMaxDepth, ());
|
||||
root->depth = depth;
|
||||
SetDepths(root->l, depth + 1);
|
||||
SetDepths(root->r, depth + 1);
|
||||
}
|
||||
} // namespace coding
|
||||
312
libs/coding/huffman.hpp
Normal file
312
libs/coding/huffman.hpp
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/bit_streams.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
class HuffmanCoder
|
||||
{
|
||||
public:
|
||||
class Freqs
|
||||
{
|
||||
public:
|
||||
using Table = std::map<uint32_t, uint32_t>;
|
||||
|
||||
Freqs() = default;
|
||||
|
||||
template <typename... Args>
|
||||
Freqs(Args const &... args)
|
||||
{
|
||||
Add(args...);
|
||||
}
|
||||
|
||||
void Add(strings::UniString const & s) { Add(s.begin(), s.end()); }
|
||||
|
||||
void Add(std::string const & s) { Add(s.begin(), s.end()); }
|
||||
|
||||
template <typename T>
|
||||
void Add(T const * begin, T const * const end)
|
||||
{
|
||||
static_assert(std::is_integral<T>::value, "");
|
||||
AddImpl(begin, end);
|
||||
}
|
||||
|
||||
template <typename It>
|
||||
void Add(It begin, It const end)
|
||||
{
|
||||
static_assert(std::is_integral<typename It::value_type>::value, "");
|
||||
AddImpl(begin, end);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Add(std::vector<T> const & v)
|
||||
{
|
||||
for (auto const & e : v)
|
||||
Add(std::begin(e), std::end(e));
|
||||
}
|
||||
|
||||
Table const & GetTable() const { return m_table; }
|
||||
|
||||
private:
|
||||
template <typename It>
|
||||
void AddImpl(It begin, It const end)
|
||||
{
|
||||
static_assert(sizeof(*begin) <= 4, "");
|
||||
for (; begin != end; ++begin)
|
||||
++m_table[static_cast<uint32_t>(*begin)];
|
||||
}
|
||||
|
||||
Table m_table;
|
||||
};
|
||||
|
||||
// A Code encodes a path to a leaf. It is read starting from
|
||||
// the least significant bit.
|
||||
struct Code
|
||||
{
|
||||
uint32_t bits;
|
||||
size_t len;
|
||||
|
||||
Code() : bits(0), len(0) {}
|
||||
Code(uint32_t bits, size_t len) : bits(bits), len(len) {}
|
||||
|
||||
bool operator<(Code const & o) const
|
||||
{
|
||||
if (bits != o.bits)
|
||||
return bits < o.bits;
|
||||
return len < o.len;
|
||||
}
|
||||
};
|
||||
|
||||
HuffmanCoder() : m_root(nullptr) {}
|
||||
~HuffmanCoder();
|
||||
|
||||
// Internally builds a Huffman tree and makes
|
||||
// the EncodeAndWrite and ReadAndDecode methods available.
|
||||
template <typename... Args>
|
||||
void Init(Args const &... args)
|
||||
{
|
||||
Clear();
|
||||
BuildHuffmanTree(Freqs(args...));
|
||||
BuildTables(m_root, 0);
|
||||
}
|
||||
|
||||
void Clear();
|
||||
|
||||
// One way to store the encoding would be
|
||||
// -- the succinct representation of the topology of Huffman tree;
|
||||
// -- the list of symbols that are stored in the leaves, as varuints in delta encoding.
|
||||
// This would probably be an overkill.
|
||||
template <typename TWriter>
|
||||
void WriteEncoding(TWriter & writer)
|
||||
{
|
||||
// @todo Do not waste space, use BitWriter.
|
||||
WriteVarUint(writer, m_decoderTable.size());
|
||||
for (auto const & kv : m_decoderTable)
|
||||
{
|
||||
WriteVarUint(writer, kv.first.bits);
|
||||
WriteVarUint(writer, kv.first.len);
|
||||
WriteVarUint(writer, kv.second);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TSource>
|
||||
void ReadEncoding(TSource & src)
|
||||
{
|
||||
DeleteHuffmanTree(m_root);
|
||||
m_root = new Node(0 /* symbol */, 0 /* freq */, false /* isLeaf */);
|
||||
|
||||
m_encoderTable.clear();
|
||||
m_decoderTable.clear();
|
||||
|
||||
size_t sz = static_cast<size_t>(ReadVarUint<uint32_t, TSource>(src));
|
||||
for (size_t i = 0; i < sz; ++i)
|
||||
{
|
||||
uint32_t bits = ReadVarUint<uint32_t, TSource>(src);
|
||||
uint32_t len = ReadVarUint<uint32_t, TSource>(src);
|
||||
uint32_t symbol = ReadVarUint<uint32_t, TSource>(src);
|
||||
Code code(bits, len);
|
||||
|
||||
m_encoderTable[symbol] = code;
|
||||
m_decoderTable[code] = symbol;
|
||||
|
||||
Node * cur = m_root;
|
||||
for (size_t j = 0; j < len; ++j)
|
||||
{
|
||||
if (((bits >> j) & 1) == 0)
|
||||
{
|
||||
if (!cur->l)
|
||||
cur->l = new Node(0 /* symbol */, 0 /* freq */, false /* isLeaf */);
|
||||
cur = cur->l;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!cur->r)
|
||||
cur->r = new Node(0 /* symbol */, 0 /* freq */, false /* isLeaf */);
|
||||
cur = cur->r;
|
||||
}
|
||||
cur->depth = j + 1;
|
||||
}
|
||||
cur->isLeaf = true;
|
||||
cur->symbol = symbol;
|
||||
}
|
||||
}
|
||||
|
||||
bool Encode(uint32_t symbol, Code & code) const;
|
||||
bool Decode(Code const & code, uint32_t & symbol) const;
|
||||
|
||||
template <typename TWriter, typename T>
|
||||
uint32_t EncodeAndWrite(TWriter & writer, T const * begin, T const * end) const
|
||||
{
|
||||
static_assert(std::is_integral<T>::value, "");
|
||||
return EncodeAndWriteImpl(writer, begin, end);
|
||||
}
|
||||
|
||||
template <typename TWriter, typename It>
|
||||
uint32_t EncodeAndWrite(TWriter & writer, It begin, It end) const
|
||||
{
|
||||
static_assert(std::is_integral<typename It::value_type>::value, "");
|
||||
return EncodeAndWriteImpl(writer, begin, end);
|
||||
}
|
||||
|
||||
template <typename TWriter>
|
||||
uint32_t EncodeAndWrite(TWriter & writer, std::string const & s) const
|
||||
{
|
||||
return EncodeAndWrite(writer, s.begin(), s.end());
|
||||
}
|
||||
|
||||
// Returns the number of bits written AFTER the size, i.e. the number
|
||||
// of bits that the encoded string consists of.
|
||||
template <typename TWriter>
|
||||
uint32_t EncodeAndWrite(TWriter & writer, strings::UniString const & s) const
|
||||
{
|
||||
return EncodeAndWrite(writer, s.begin(), s.end());
|
||||
}
|
||||
|
||||
template <typename TSource, typename OutIt>
|
||||
OutIt ReadAndDecode(TSource & src, OutIt out) const
|
||||
{
|
||||
BitReader<TSource> bitReader(src);
|
||||
size_t sz = static_cast<size_t>(ReadVarUint<uint32_t, TSource>(src));
|
||||
for (size_t i = 0; i < sz; ++i)
|
||||
*out++ = ReadAndDecode(bitReader);
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename TSource>
|
||||
strings::UniString ReadAndDecode(TSource & src) const
|
||||
{
|
||||
strings::UniString result;
|
||||
ReadAndDecode(src, std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Node
|
||||
{
|
||||
Node *l, *r;
|
||||
uint32_t symbol;
|
||||
uint32_t freq;
|
||||
size_t depth;
|
||||
bool isLeaf;
|
||||
|
||||
Node(uint32_t symbol, uint32_t freq, bool isLeaf)
|
||||
: l(nullptr)
|
||||
, r(nullptr)
|
||||
, symbol(symbol)
|
||||
, freq(freq)
|
||||
, depth(0)
|
||||
, isLeaf(isLeaf)
|
||||
{}
|
||||
};
|
||||
|
||||
struct NodeComparator
|
||||
{
|
||||
bool operator()(Node const * const a, Node const * const b) const
|
||||
{
|
||||
if (a->freq != b->freq)
|
||||
return a->freq > b->freq;
|
||||
return a->symbol > b->symbol;
|
||||
}
|
||||
};
|
||||
|
||||
// No need to clump the interface: keep private the methods
|
||||
// that encode one symbol only.
|
||||
template <typename TWriter>
|
||||
size_t EncodeAndWrite(BitWriter<TWriter> & bitWriter, uint32_t symbol) const
|
||||
{
|
||||
Code code;
|
||||
CHECK(Encode(symbol, code), ());
|
||||
size_t fullBytes = code.len / CHAR_BIT;
|
||||
size_t rem = code.len % CHAR_BIT;
|
||||
for (size_t i = 0; i < fullBytes; ++i)
|
||||
{
|
||||
bitWriter.Write(code.bits & 0xFF, CHAR_BIT);
|
||||
code.bits >>= CHAR_BIT;
|
||||
}
|
||||
bitWriter.Write(code.bits, rem);
|
||||
return code.len;
|
||||
}
|
||||
|
||||
template <typename TWriter, typename It>
|
||||
uint32_t EncodeAndWriteImpl(TWriter & writer, It begin, It end) const
|
||||
{
|
||||
static_assert(sizeof(*begin) <= 4, "");
|
||||
|
||||
size_t const d = base::asserted_cast<size_t>(std::distance(begin, end));
|
||||
BitWriter<TWriter> bitWriter(writer);
|
||||
WriteVarUint(writer, d);
|
||||
uint32_t sz = 0;
|
||||
for (; begin != end; ++begin)
|
||||
sz += EncodeAndWrite(bitWriter, static_cast<uint32_t>(*begin));
|
||||
return sz;
|
||||
}
|
||||
|
||||
template <typename TSource>
|
||||
uint32_t ReadAndDecode(BitReader<TSource> & bitReader) const
|
||||
{
|
||||
Node * cur = m_root;
|
||||
while (cur)
|
||||
{
|
||||
if (cur->isLeaf)
|
||||
return cur->symbol;
|
||||
uint8_t bit = bitReader.Read(1);
|
||||
if (bit == 0)
|
||||
cur = cur->l;
|
||||
else
|
||||
cur = cur->r;
|
||||
}
|
||||
CHECK(false, ("Could not decode a Huffman-encoded symbol."));
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Converts a Huffman tree into the more convenient representation
|
||||
// of encoding and decoding tables.
|
||||
void BuildTables(Node * root, uint32_t path);
|
||||
|
||||
void DeleteHuffmanTree(Node * root);
|
||||
|
||||
void BuildHuffmanTree(Freqs const & freqs);
|
||||
|
||||
// Properly sets the depth field in the subtree rooted at root.
|
||||
// It is easier to do it after the tree is built.
|
||||
void SetDepths(Node * root, uint32_t depth);
|
||||
|
||||
Node * m_root;
|
||||
std::map<Code, uint32_t> m_decoderTable;
|
||||
std::map<uint32_t, Code> m_encoderTable;
|
||||
};
|
||||
} // namespace coding
|
||||
29
libs/coding/internal/file64_api.hpp
Normal file
29
libs/coding/internal/file64_api.hpp
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include "std/target_os.hpp"
|
||||
|
||||
#if defined(OMIM_OS_WINDOWS_NATIVE)
|
||||
#define fseek64 _fseeki64
|
||||
#define ftell64 _ftelli64
|
||||
|
||||
#elif defined(OMIM_OS_WINDOWS_MINGW)
|
||||
#define fseek64 fseeko64
|
||||
#define ftell64 ftello64
|
||||
|
||||
#else
|
||||
// POSIX standart.
|
||||
#include <sys/types.h>
|
||||
|
||||
// TODO: Always assert for 8 bytes after increasing min Android API to 24+.
|
||||
// See more details here: https://android.googlesource.com/platform/bionic/+/master/docs/32-bit-abi.md
|
||||
#if defined(OMIM_OS_ANDROID) && (defined(__arm__) || defined(__i386__))
|
||||
static_assert(sizeof(off_t) == 4, "32-bit Android NDK < API 24 has only 32-bit file operations support");
|
||||
#else
|
||||
static_assert(sizeof(off_t) == 8, "FileReader and FileWriter require 64-bit file operations");
|
||||
#endif
|
||||
#define fseek64 fseeko
|
||||
#define ftell64 ftello
|
||||
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
339
libs/coding/internal/file_data.cpp
Normal file
339
libs/coding/internal/file_data.cpp
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
#include "coding/internal/file_data.hpp"
|
||||
|
||||
#include "coding/constants.hpp"
|
||||
#include "coding/internal/file64_api.hpp"
|
||||
#include "coding/reader.hpp" // For Reader exceptions.
|
||||
#include "coding/writer.hpp" // For Writer exceptions.
|
||||
|
||||
#include "base/exception.hpp"
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/target_os.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
#include <io.h>
|
||||
#else
|
||||
#include <unistd.h> // ftruncate
|
||||
#endif
|
||||
|
||||
namespace base
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
std::ostream & operator<<(std::ostream & stream, FileData::Op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case FileData::Op::READ: stream << "READ"; break;
|
||||
case FileData::Op::WRITE_TRUNCATE: stream << "WRITE_TRUNCATE"; break;
|
||||
case FileData::Op::WRITE_EXISTING: stream << "WRITE_EXISTING"; break;
|
||||
case FileData::Op::APPEND: stream << "APPEND"; break;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
FileData::FileData(string const & fileName, Op op) : m_FileName(fileName), m_Op(op)
|
||||
{
|
||||
char const * const modes[] = {"rb", "wb", "r+b", "ab"};
|
||||
|
||||
m_File = fopen(fileName.c_str(), modes[static_cast<int>(op)]);
|
||||
if (m_File)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
// Move file pointer to the end of the file to make it consistent with other platforms
|
||||
if (op == Op::APPEND)
|
||||
fseek64(m_File, 0, SEEK_END);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
if (op == Op::WRITE_EXISTING)
|
||||
{
|
||||
// Special case, since "r+b" fails if file doesn't exist.
|
||||
m_File = fopen(fileName.c_str(), "wb");
|
||||
if (m_File)
|
||||
return;
|
||||
}
|
||||
|
||||
// if we're here - something bad is happened
|
||||
if (m_Op != Op::READ)
|
||||
MYTHROW(Writer::OpenException, (GetErrorProlog()));
|
||||
else
|
||||
MYTHROW(Reader::OpenException, (GetErrorProlog()));
|
||||
}
|
||||
|
||||
FileData::~FileData()
|
||||
{
|
||||
if (m_File)
|
||||
{
|
||||
if (fclose(m_File))
|
||||
LOG(LWARNING, ("Error closing file", GetErrorProlog()));
|
||||
}
|
||||
}
|
||||
|
||||
string FileData::GetErrorProlog() const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream << m_FileName << "; " << m_Op << "; " << strerror(errno);
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
static int64_t constexpr INVALID_POS = -1;
|
||||
|
||||
uint64_t FileData::Size() const
|
||||
{
|
||||
int64_t const pos = ftell64(m_File);
|
||||
if (pos == INVALID_POS)
|
||||
MYTHROW(Reader::SizeException, (GetErrorProlog(), pos));
|
||||
|
||||
if (fseek64(m_File, 0, SEEK_END))
|
||||
MYTHROW(Reader::SizeException, (GetErrorProlog()));
|
||||
|
||||
int64_t const size = ftell64(m_File);
|
||||
if (size == INVALID_POS)
|
||||
MYTHROW(Reader::SizeException, (GetErrorProlog(), size));
|
||||
|
||||
if (fseek64(m_File, static_cast<off_t>(pos), SEEK_SET))
|
||||
MYTHROW(Reader::SizeException, (GetErrorProlog(), pos));
|
||||
|
||||
ASSERT_GREATER_OR_EQUAL(size, 0, ());
|
||||
return static_cast<uint64_t>(size);
|
||||
}
|
||||
|
||||
void FileData::Read(uint64_t pos, void * p, size_t size)
|
||||
{
|
||||
if (fseek64(m_File, static_cast<off_t>(pos), SEEK_SET))
|
||||
MYTHROW(Reader::ReadException, (GetErrorProlog(), pos));
|
||||
|
||||
size_t const bytesRead = fread(p, 1, size, m_File);
|
||||
if (bytesRead != size || ferror(m_File))
|
||||
MYTHROW(Reader::ReadException, (GetErrorProlog(), bytesRead, pos, size));
|
||||
}
|
||||
|
||||
uint64_t FileData::Pos() const
|
||||
{
|
||||
int64_t const pos = ftell64(m_File);
|
||||
if (pos == INVALID_POS)
|
||||
MYTHROW(Writer::PosException, (GetErrorProlog(), pos));
|
||||
|
||||
ASSERT_GREATER_OR_EQUAL(pos, 0, ());
|
||||
return static_cast<uint64_t>(pos);
|
||||
}
|
||||
|
||||
void FileData::Seek(uint64_t pos)
|
||||
{
|
||||
ASSERT_NOT_EQUAL(m_Op, Op::APPEND, (m_FileName, m_Op, pos));
|
||||
if (fseek64(m_File, static_cast<off_t>(pos), SEEK_SET))
|
||||
MYTHROW(Writer::SeekException, (GetErrorProlog(), pos));
|
||||
}
|
||||
|
||||
void FileData::Write(void const * p, size_t size)
|
||||
{
|
||||
size_t const bytesWritten = fwrite(p, 1, size, m_File);
|
||||
if (bytesWritten != size || ferror(m_File))
|
||||
MYTHROW(Writer::WriteException, (GetErrorProlog(), bytesWritten, size));
|
||||
}
|
||||
|
||||
void FileData::Flush()
|
||||
{
|
||||
if (fflush(m_File))
|
||||
MYTHROW(Writer::WriteException, (GetErrorProlog()));
|
||||
}
|
||||
|
||||
void FileData::Truncate(uint64_t sz)
|
||||
{
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
int const res = _chsize(fileno(m_File), sz);
|
||||
#else
|
||||
int const res = ftruncate(fileno(m_File), static_cast<off_t>(sz));
|
||||
#endif
|
||||
|
||||
if (res)
|
||||
MYTHROW(Writer::WriteException, (GetErrorProlog(), sz));
|
||||
}
|
||||
|
||||
bool GetFileSize(string const & fName, uint64_t & sz)
|
||||
{
|
||||
try
|
||||
{
|
||||
typedef FileData fdata_t;
|
||||
fdata_t f(fName, fdata_t::Op::READ);
|
||||
sz = f.Size();
|
||||
return true;
|
||||
}
|
||||
catch (RootException const &)
|
||||
{
|
||||
// supress all exceptions here
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool CheckFileOperationResult(int res, string const & fName)
|
||||
{
|
||||
if (!res)
|
||||
return true;
|
||||
|
||||
LOG(LWARNING, ("File operation error for file:", fName, "-", strerror(errno)));
|
||||
|
||||
// additional check if file really was removed correctly
|
||||
uint64_t dummy;
|
||||
if (GetFileSize(fName, dummy))
|
||||
LOG(LERROR, ("File exists but can't be deleted. Sharing violation?", fName));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsEOF(ifstream & fs)
|
||||
{
|
||||
return fs.peek() == ifstream::traits_type::eof();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool DeleteFileX(string const & fName)
|
||||
{
|
||||
int res = remove(fName.c_str());
|
||||
return CheckFileOperationResult(res, fName);
|
||||
}
|
||||
|
||||
bool RenameFileX(string const & fOld, string const & fNew)
|
||||
{
|
||||
int res = rename(fOld.c_str(), fNew.c_str());
|
||||
return CheckFileOperationResult(res, fOld);
|
||||
}
|
||||
|
||||
bool MoveFileX(string const & fOld, string const & fNew)
|
||||
{
|
||||
// Try to rename the file first.
|
||||
int res = rename(fOld.c_str(), fNew.c_str());
|
||||
if (res == 0)
|
||||
return true;
|
||||
|
||||
// Otherwise perform the full move.
|
||||
if (!CopyFileX(fOld, fNew))
|
||||
{
|
||||
(void)DeleteFileX(fNew);
|
||||
return false;
|
||||
}
|
||||
(void)DeleteFileX(fOld);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WriteToTempAndRenameToFile(string const & dest, function<bool(string const &)> const & write, string const & tmp)
|
||||
{
|
||||
string const tmpFileName = tmp.empty() ? dest + ".tmp" + strings::to_string(this_thread::get_id()) : tmp;
|
||||
if (!write(tmpFileName))
|
||||
{
|
||||
LOG(LERROR, ("Can't write to", tmpFileName));
|
||||
DeleteFileX(tmpFileName);
|
||||
return false;
|
||||
}
|
||||
if (!RenameFileX(tmpFileName, dest))
|
||||
{
|
||||
LOG(LERROR, ("Can't rename file", tmpFileName, "to", dest));
|
||||
DeleteFileX(tmpFileName);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void AppendFileToFile(string const & fromFilename, string const & toFilename)
|
||||
{
|
||||
ifstream from;
|
||||
from.exceptions(fstream::failbit | fstream::badbit);
|
||||
from.open(fromFilename, ios::binary);
|
||||
|
||||
ofstream to;
|
||||
to.exceptions(fstream::badbit);
|
||||
to.open(toFilename, ios::binary | ios::app);
|
||||
|
||||
auto * buffer = from.rdbuf();
|
||||
if (!IsEOF(from))
|
||||
to << buffer;
|
||||
}
|
||||
|
||||
bool CopyFileX(string const & fOld, string const & fNew)
|
||||
{
|
||||
ifstream ifs;
|
||||
ofstream ofs;
|
||||
ifs.exceptions(ifstream::failbit | ifstream::badbit);
|
||||
ofs.exceptions(ifstream::failbit | ifstream::badbit);
|
||||
|
||||
try
|
||||
{
|
||||
ifs.open(fOld.c_str());
|
||||
ofs.open(fNew.c_str());
|
||||
|
||||
// If source file is empty - make empty dest file without any errors.
|
||||
if (IsEOF(ifs))
|
||||
return true;
|
||||
|
||||
ofs << ifs.rdbuf();
|
||||
ofs.flush();
|
||||
return true;
|
||||
}
|
||||
catch (system_error const &)
|
||||
{
|
||||
LOG(LWARNING, ("Failed to copy file from", fOld, "to", fNew, ":", strerror(errno)));
|
||||
}
|
||||
catch (exception const &)
|
||||
{
|
||||
LOG(LERROR, ("Unknown error when coping files:", fOld, "to", fNew, strerror(errno)));
|
||||
}
|
||||
|
||||
// Don't care about possible error here ..
|
||||
(void)DeleteFileX(fNew);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsEqualFiles(string const & firstFile, string const & secondFile)
|
||||
{
|
||||
FileData first(firstFile, FileData::Op::READ);
|
||||
FileData second(secondFile, FileData::Op::READ);
|
||||
if (first.Size() != second.Size())
|
||||
return false;
|
||||
|
||||
size_t constexpr bufSize = READ_FILE_BUFFER_SIZE;
|
||||
vector<char> buf1, buf2;
|
||||
buf1.resize(bufSize);
|
||||
buf2.resize(bufSize);
|
||||
size_t const fileSize = static_cast<size_t>(first.Size());
|
||||
size_t currSize = 0;
|
||||
|
||||
while (currSize < fileSize)
|
||||
{
|
||||
size_t const toRead = min(bufSize, fileSize - currSize);
|
||||
|
||||
first.Read(currSize, &buf1[0], toRead);
|
||||
second.Read(currSize, &buf2[0], toRead);
|
||||
|
||||
if (buf1 != buf2)
|
||||
return false;
|
||||
|
||||
currSize += toRead;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> ReadFile(std::string const & filePath)
|
||||
{
|
||||
FileData file(filePath, FileData::Op::READ);
|
||||
uint64_t const sz = file.Size();
|
||||
std::vector<uint8_t> contents(sz);
|
||||
file.Read(0, contents.data(), sz);
|
||||
return contents;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
69
libs/coding/internal/file_data.hpp
Normal file
69
libs/coding/internal/file_data.hpp
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
namespace base
|
||||
{
|
||||
class FileData
|
||||
{
|
||||
public:
|
||||
/// @note Do not change order (@see FileData::FileData).
|
||||
enum class Op
|
||||
{
|
||||
READ = 0,
|
||||
WRITE_TRUNCATE,
|
||||
WRITE_EXISTING,
|
||||
APPEND
|
||||
};
|
||||
|
||||
FileData(std::string const & fileName, Op op);
|
||||
~FileData();
|
||||
|
||||
uint64_t Size() const;
|
||||
uint64_t Pos() const;
|
||||
|
||||
void Seek(uint64_t pos);
|
||||
|
||||
void Read(uint64_t pos, void * p, size_t size);
|
||||
void Write(void const * p, size_t size);
|
||||
|
||||
void Flush();
|
||||
void Truncate(uint64_t sz);
|
||||
|
||||
std::string const & GetName() const { return m_FileName; }
|
||||
|
||||
private:
|
||||
FILE * m_File;
|
||||
std::string const m_FileName;
|
||||
Op const m_Op;
|
||||
|
||||
std::string GetErrorProlog() const;
|
||||
|
||||
DISALLOW_COPY(FileData);
|
||||
};
|
||||
|
||||
bool GetFileSize(std::string const & fName, uint64_t & sz);
|
||||
bool DeleteFileX(std::string const & fName);
|
||||
bool RenameFileX(std::string const & fOld, std::string const & fNew);
|
||||
|
||||
/// Write to temp file and rename it to dest. Delete temp on failure.
|
||||
/// @param write function that writes to file with a given name, returns true on success.
|
||||
bool WriteToTempAndRenameToFile(std::string const & dest, std::function<bool(std::string const &)> const & write,
|
||||
std::string const & tmp = "");
|
||||
|
||||
void AppendFileToFile(std::string const & fromFilename, std::string const & toFilename);
|
||||
|
||||
/// @return false if copy fails. DOES NOT THROWS exceptions
|
||||
bool CopyFileX(std::string const & fOld, std::string const & fNew);
|
||||
/// @return false if moving fails. DOES NOT THROW exceptions
|
||||
bool MoveFileX(std::string const & fOld, std::string const & fNew);
|
||||
bool IsEqualFiles(std::string const & firstFile, std::string const & secondFile);
|
||||
|
||||
std::vector<uint8_t> ReadFile(std::string const & filePath);
|
||||
|
||||
} // namespace base
|
||||
162
libs/coding/internal/xmlparser.hpp
Normal file
162
libs/coding/internal/xmlparser.hpp
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#ifndef XML_STATIC
|
||||
#define XML_STATIC
|
||||
#endif
|
||||
#include <expat.h>
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
/// Dispatcher's methods Push, Pop and AddAttr can conveniently take different parameters:
|
||||
/// 1. char const * (no any overhead, is called by the Expat)
|
||||
/// 2. std::string or std::string const & (temporary std::string is created from char const *)
|
||||
/// 3. std::string_view (created from char const *)
|
||||
///
|
||||
/// CharData accepts std::string const & or std::string & to modify the data before consumption.
|
||||
template <typename DispatcherT>
|
||||
class XmlParser
|
||||
{
|
||||
public:
|
||||
explicit XmlParser(DispatcherT & dispatcher, bool enableCharHandler = false)
|
||||
: m_depth(0)
|
||||
, m_restrictDepth(static_cast<size_t>(-1))
|
||||
, m_dispatcher(dispatcher)
|
||||
, m_enableCharHandler(enableCharHandler)
|
||||
, m_parser(std::unique_ptr<XML_ParserStruct, decltype(&XML_ParserFree)>(XML_ParserCreate(nullptr /* encoding */),
|
||||
&XML_ParserFree))
|
||||
{
|
||||
CHECK(m_parser, ());
|
||||
OnPostCreate();
|
||||
}
|
||||
|
||||
static void StartElementHandler(void * userData, XML_Char const * name, XML_Char const ** attrs)
|
||||
{
|
||||
CHECK(userData, (name));
|
||||
auto * xmlParser = static_cast<XmlParser *>(userData);
|
||||
xmlParser->OnStartElement(name, attrs);
|
||||
}
|
||||
|
||||
static void EndElementHandler(void * userData, XML_Char const * name)
|
||||
{
|
||||
CHECK(userData, (name));
|
||||
auto * xmlParser = static_cast<XmlParser *>(userData);
|
||||
xmlParser->OnEndElement(name);
|
||||
}
|
||||
|
||||
static void CharacterDataHandler(void * userData, XML_Char const * data, int length)
|
||||
{
|
||||
CHECK(userData, (data));
|
||||
auto * xmlParser = static_cast<XmlParser *>(userData);
|
||||
xmlParser->OnCharacterData(data, length);
|
||||
}
|
||||
|
||||
void * GetBuffer(int len)
|
||||
{
|
||||
CHECK(m_parser, ());
|
||||
return XML_GetBuffer(m_parser.get(), len);
|
||||
}
|
||||
|
||||
XML_Status ParseBuffer(int len, int isFinal)
|
||||
{
|
||||
CHECK(m_parser, ());
|
||||
return XML_ParseBuffer(m_parser.get(), len, isFinal);
|
||||
}
|
||||
|
||||
void OnPostCreate()
|
||||
{
|
||||
CHECK(m_parser, ());
|
||||
// Enable all the event routines we want
|
||||
XML_SetStartElementHandler(m_parser.get(), StartElementHandler);
|
||||
XML_SetEndElementHandler(m_parser.get(), EndElementHandler);
|
||||
if (m_enableCharHandler)
|
||||
XML_SetCharacterDataHandler(m_parser.get(), CharacterDataHandler);
|
||||
|
||||
XML_SetUserData(m_parser.get(), static_cast<void *>(this));
|
||||
}
|
||||
|
||||
using StringPtrT = XML_Char const *;
|
||||
|
||||
// Start element handler
|
||||
void OnStartElement(StringPtrT name, StringPtrT * attrs)
|
||||
{
|
||||
CheckCharData();
|
||||
|
||||
++m_depth;
|
||||
if (m_depth >= m_restrictDepth)
|
||||
return;
|
||||
|
||||
if (!m_dispatcher.Push(name))
|
||||
{
|
||||
m_restrictDepth = m_depth;
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; attrs[2 * i]; ++i)
|
||||
m_dispatcher.AddAttr(attrs[2 * i], attrs[2 * i + 1]);
|
||||
}
|
||||
|
||||
// End element handler
|
||||
void OnEndElement(StringPtrT name)
|
||||
{
|
||||
CheckCharData();
|
||||
|
||||
--m_depth;
|
||||
if (m_depth >= m_restrictDepth)
|
||||
return;
|
||||
|
||||
if (m_restrictDepth != size_t(-1))
|
||||
m_restrictDepth = static_cast<size_t>(-1);
|
||||
else
|
||||
m_dispatcher.Pop(name);
|
||||
}
|
||||
|
||||
void OnCharacterData(XML_Char const * data, int length)
|
||||
{
|
||||
// Accumulate character data - it can be passed by parts
|
||||
// (when reading from fixed length buffer).
|
||||
m_charData.append(data, length);
|
||||
}
|
||||
|
||||
std::string GetErrorMessage()
|
||||
{
|
||||
if (XML_GetErrorCode(m_parser.get()) == XML_ERROR_NONE)
|
||||
return {};
|
||||
|
||||
std::stringstream s;
|
||||
s << "XML parse error at line " << XML_GetCurrentLineNumber(m_parser.get()) << " and byte "
|
||||
<< XML_GetCurrentByteIndex(m_parser.get());
|
||||
return s.str();
|
||||
}
|
||||
|
||||
private:
|
||||
size_t m_depth;
|
||||
size_t m_restrictDepth;
|
||||
DispatcherT & m_dispatcher;
|
||||
|
||||
std::string m_charData;
|
||||
bool m_enableCharHandler;
|
||||
std::unique_ptr<XML_ParserStruct, decltype(&XML_ParserFree)> m_parser;
|
||||
|
||||
void CheckCharData()
|
||||
{
|
||||
if (m_enableCharHandler && !m_charData.empty())
|
||||
{
|
||||
m_dispatcher.CharData(m_charData);
|
||||
m_charData.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
312
libs/coding/map_uint32_to_val.hpp
Normal file
312
libs/coding/map_uint32_to_val.hpp
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/files_container.hpp"
|
||||
#include "coding/memory_region.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/succinct_mapper.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunused-private-field"
|
||||
#endif
|
||||
|
||||
#include "3party/succinct/elias_fano.hpp"
|
||||
#include "3party/succinct/rs_bit_vector.hpp"
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
// A data structure that allows storing a map from small 32-bit integers (the main use
|
||||
// case is feature ids of a single mwm) to arbitrary values and accessing this map
|
||||
// with a small RAM footprint.
|
||||
//
|
||||
// Format:
|
||||
// File offset (bytes) Field name Field size (bytes)
|
||||
// 0 version 2
|
||||
// 2 block size 2
|
||||
// 4 positions offset 4
|
||||
// 8 variables offset 4
|
||||
// 12 end of section 4
|
||||
// 16 identifiers table positions offset - 16
|
||||
// positions offset positions table variables offset - positions offset
|
||||
// variables offset variables blocks end of section - variables offset
|
||||
//
|
||||
// Identifiers table is a bit-vector with rank-select table, where set
|
||||
// bits denote that values for the corresponding features are in the
|
||||
// table. Identifiers table is stored in the native endianness.
|
||||
//
|
||||
// Positions table is an Elias-Fano table where each entry corresponds
|
||||
// to the start position of the variables block.
|
||||
//
|
||||
// Variables is a sequence of blocks, where each block (with the
|
||||
// exception of the last one) is a sequence of kBlockSize variables
|
||||
// encoded by block encoding callback.
|
||||
//
|
||||
// On Get call m_blockSize consecutive variables are decoded and cached in RAM.
|
||||
|
||||
template <typename Value>
|
||||
class MapUint32ToValue
|
||||
{
|
||||
// 0 - initial version.
|
||||
// 1 - added m_blockSize instead of m_endianess.
|
||||
static uint16_t constexpr kLastVersion = 1;
|
||||
|
||||
public:
|
||||
using ReadBlockCallback = std::function<void(NonOwningReaderSource &, uint32_t, std::vector<Value> &)>;
|
||||
|
||||
struct Header
|
||||
{
|
||||
uint16_t Read(Reader & reader)
|
||||
{
|
||||
NonOwningReaderSource source(reader);
|
||||
auto const version = ReadPrimitiveFromSource<uint16_t>(source);
|
||||
m_blockSize = ReadPrimitiveFromSource<uint16_t>(source);
|
||||
if (version == 0)
|
||||
m_blockSize = 64;
|
||||
|
||||
m_positionsOffset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
m_variablesOffset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
m_endOffset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
return version;
|
||||
}
|
||||
|
||||
void Write(Writer & writer)
|
||||
{
|
||||
WriteToSink(writer, kLastVersion);
|
||||
WriteToSink(writer, m_blockSize);
|
||||
WriteToSink(writer, m_positionsOffset);
|
||||
WriteToSink(writer, m_variablesOffset);
|
||||
WriteToSink(writer, m_endOffset);
|
||||
}
|
||||
|
||||
uint16_t m_blockSize = 0;
|
||||
uint32_t m_positionsOffset = 0;
|
||||
uint32_t m_variablesOffset = 0;
|
||||
uint32_t m_endOffset = 0;
|
||||
};
|
||||
|
||||
MapUint32ToValue(Reader & reader, ReadBlockCallback const & readBlockCallback)
|
||||
: m_reader(reader)
|
||||
, m_readBlockCallback(readBlockCallback)
|
||||
{}
|
||||
|
||||
/// @name Tries to get |value| for key identified by |id|.
|
||||
/// @returns false if table does not have entry for this id.
|
||||
/// @{
|
||||
[[nodiscard]] bool Get(uint32_t id, Value & value)
|
||||
{
|
||||
if (id >= m_ids.size() || !m_ids[id])
|
||||
return false;
|
||||
|
||||
uint32_t const rank = static_cast<uint32_t>(m_ids.rank(id));
|
||||
uint32_t const base = rank / m_header.m_blockSize;
|
||||
uint32_t const offset = rank % m_header.m_blockSize;
|
||||
|
||||
auto & entry = m_cache[base];
|
||||
if (entry.empty())
|
||||
entry = GetImpl(rank, m_header.m_blockSize);
|
||||
|
||||
value = entry[offset];
|
||||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool GetThreadsafe(uint32_t id, Value & value) const
|
||||
{
|
||||
if (id >= m_ids.size() || !m_ids[id])
|
||||
return false;
|
||||
|
||||
uint32_t const rank = static_cast<uint32_t>(m_ids.rank(id));
|
||||
uint32_t const offset = rank % m_header.m_blockSize;
|
||||
|
||||
auto const entry = GetImpl(rank, offset + 1);
|
||||
|
||||
value = entry[offset];
|
||||
return true;
|
||||
}
|
||||
/// @}
|
||||
|
||||
// Loads MapUint32ToValue instance. Note that |reader| must be alive
|
||||
// until the destruction of loaded table. Returns nullptr if
|
||||
// MapUint32ToValue can't be loaded.
|
||||
// It's guaranteed that |readBlockCallback| will not be called for empty block.
|
||||
static std::unique_ptr<MapUint32ToValue> Load(Reader & reader, ReadBlockCallback const & readBlockCallback)
|
||||
{
|
||||
auto table = std::make_unique<MapUint32ToValue>(reader, readBlockCallback);
|
||||
if (!table->Init())
|
||||
return {};
|
||||
return table;
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void ForEach(Fn && fn)
|
||||
{
|
||||
for (uint64_t i = 0; i < m_ids.num_ones(); ++i)
|
||||
{
|
||||
auto const j = static_cast<uint32_t>(m_ids.select(i));
|
||||
Value value;
|
||||
CHECK(Get(j, value), (i, j));
|
||||
fn(j, value);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t Count() const { return m_ids.num_ones(); }
|
||||
|
||||
private:
|
||||
/// @param[in] upperSize Read until this size. Can be one of: \n
|
||||
/// - m_header.m_blockSize for the regular Get version with cache \n
|
||||
/// - index + 1 for the GetThreadsafe version without cache, to break when needed element is readed \n
|
||||
std::vector<Value> GetImpl(uint32_t rank, uint32_t upperSize) const
|
||||
{
|
||||
uint32_t const base = rank / m_header.m_blockSize;
|
||||
auto const start = m_offsets.select(base);
|
||||
auto const end = base + 1 < m_offsets.num_ones() ? m_offsets.select(base + 1) + m_header.m_variablesOffset
|
||||
: m_header.m_endOffset;
|
||||
NonOwningReaderSource src(m_reader, m_header.m_variablesOffset + start, end);
|
||||
|
||||
// Important! Client should read while src.Size() > 0 and max |upperSize| number of elements.
|
||||
std::vector<Value> values;
|
||||
m_readBlockCallback(src, upperSize, values);
|
||||
return values;
|
||||
}
|
||||
|
||||
bool Init()
|
||||
{
|
||||
auto const version = m_header.Read(m_reader);
|
||||
if (version > kLastVersion)
|
||||
{
|
||||
LOG(LERROR, ("Unsupported version =", version, "Last known version =", kLastVersion));
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t const idsSize = m_header.m_positionsOffset - sizeof(m_header);
|
||||
std::vector<uint8_t> data(idsSize);
|
||||
m_reader.Read(sizeof(m_header), data.data(), data.size());
|
||||
m_idsRegion = std::make_unique<CopiedMemoryRegion>(std::move(data));
|
||||
|
||||
coding::MapVisitor visitor(m_idsRegion->ImmutableData());
|
||||
m_ids.map(visitor);
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t const offsetsSize = m_header.m_variablesOffset - m_header.m_positionsOffset;
|
||||
std::vector<uint8_t> data(offsetsSize);
|
||||
m_reader.Read(m_header.m_positionsOffset, data.data(), data.size());
|
||||
m_offsetsRegion = std::make_unique<CopiedMemoryRegion>(std::move(data));
|
||||
|
||||
coding::MapVisitor visitor(m_offsetsRegion->ImmutableData());
|
||||
m_offsets.map(visitor);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Header m_header;
|
||||
Reader & m_reader;
|
||||
|
||||
std::unique_ptr<CopiedMemoryRegion> m_idsRegion;
|
||||
std::unique_ptr<CopiedMemoryRegion> m_offsetsRegion;
|
||||
|
||||
succinct::rs_bit_vector m_ids;
|
||||
succinct::elias_fano m_offsets;
|
||||
|
||||
ReadBlockCallback m_readBlockCallback;
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<Value>> m_cache;
|
||||
};
|
||||
|
||||
template <typename Value>
|
||||
class MapUint32ToValueBuilder
|
||||
{
|
||||
public:
|
||||
using Iter = typename std::vector<Value>::const_iterator;
|
||||
using WriteBlockCallback = std::function<void(Writer &, Iter, Iter)>;
|
||||
using Map = MapUint32ToValue<Value>;
|
||||
|
||||
void Put(uint32_t id, Value value)
|
||||
{
|
||||
if (!m_ids.empty())
|
||||
CHECK_LESS(m_ids.back(), id, ());
|
||||
|
||||
m_values.push_back(value);
|
||||
m_ids.push_back(id);
|
||||
}
|
||||
|
||||
// It's guaranteed that |writeBlockCallback| will not be called for empty block.
|
||||
template <class WriterT>
|
||||
void Freeze(WriterT & writer, WriteBlockCallback const & writeBlockCallback, uint16_t blockSize = 64) const
|
||||
{
|
||||
typename Map::Header header;
|
||||
header.m_blockSize = blockSize;
|
||||
|
||||
auto const startOffset = writer.Pos();
|
||||
header.Write(writer);
|
||||
|
||||
{
|
||||
uint64_t const numBits = m_ids.empty() ? 0 : m_ids.back() + 1;
|
||||
|
||||
succinct::bit_vector_builder builder(numBits);
|
||||
for (auto const & id : m_ids)
|
||||
builder.set(id, true);
|
||||
|
||||
coding::FreezeVisitor<WriterT> visitor(writer);
|
||||
succinct::rs_bit_vector(&builder).map(visitor);
|
||||
}
|
||||
|
||||
std::vector<uint32_t> offsets;
|
||||
std::vector<uint8_t> variables;
|
||||
|
||||
{
|
||||
MemWriter<std::vector<uint8_t>> writer(variables);
|
||||
for (size_t i = 0; i < m_values.size(); i += blockSize)
|
||||
{
|
||||
offsets.push_back(static_cast<uint32_t>(variables.size()));
|
||||
|
||||
auto const endOffset = std::min(i + blockSize, m_values.size());
|
||||
CHECK_GREATER(endOffset, i, ());
|
||||
writeBlockCallback(writer, m_values.cbegin() + i, m_values.cbegin() + endOffset);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
succinct::elias_fano::elias_fano_builder builder(offsets.empty() ? 0 : offsets.back() + 1, offsets.size());
|
||||
for (auto const & offset : offsets)
|
||||
builder.push_back(offset);
|
||||
|
||||
header.m_positionsOffset = base::checked_cast<uint32_t>(writer.Pos() - startOffset);
|
||||
coding::FreezeVisitor<WriterT> visitor(writer);
|
||||
succinct::elias_fano(&builder).map(visitor);
|
||||
}
|
||||
|
||||
{
|
||||
header.m_variablesOffset = base::checked_cast<uint32_t>(writer.Pos() - startOffset);
|
||||
writer.Write(variables.data(), variables.size());
|
||||
header.m_endOffset = base::checked_cast<uint32_t>(writer.Pos() - startOffset);
|
||||
}
|
||||
|
||||
auto const endOffset = writer.Pos();
|
||||
|
||||
writer.Seek(startOffset);
|
||||
header.Write(writer);
|
||||
writer.Seek(endOffset);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<Value> m_values;
|
||||
std::vector<uint32_t> m_ids;
|
||||
};
|
||||
50
libs/coding/memory_region.hpp
Normal file
50
libs/coding/memory_region.hpp
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/files_container.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
class MemoryRegion
|
||||
{
|
||||
public:
|
||||
virtual ~MemoryRegion() = default;
|
||||
|
||||
virtual uint64_t Size() const = 0;
|
||||
virtual uint8_t const * ImmutableData() const = 0;
|
||||
};
|
||||
|
||||
class MappedMemoryRegion : public MemoryRegion
|
||||
{
|
||||
public:
|
||||
explicit MappedMemoryRegion(FilesMappingContainer::Handle && handle) : m_handle(std::move(handle)) {}
|
||||
|
||||
// MemoryRegion overrides:
|
||||
uint64_t Size() const override { return m_handle.GetSize(); }
|
||||
uint8_t const * ImmutableData() const override { return m_handle.GetData<uint8_t>(); }
|
||||
|
||||
private:
|
||||
FilesMappingContainer::Handle m_handle;
|
||||
|
||||
DISALLOW_COPY(MappedMemoryRegion);
|
||||
};
|
||||
|
||||
class CopiedMemoryRegion : public MemoryRegion
|
||||
{
|
||||
public:
|
||||
explicit CopiedMemoryRegion(std::vector<uint8_t> && buffer) : m_buffer(std::move(buffer)) {}
|
||||
|
||||
// MemoryRegion overrides:
|
||||
uint64_t Size() const override { return m_buffer.size(); }
|
||||
uint8_t const * ImmutableData() const override { return m_buffer.data(); }
|
||||
|
||||
uint8_t * MutableData() { return m_buffer.data(); }
|
||||
|
||||
private:
|
||||
std::vector<uint8_t> m_buffer;
|
||||
|
||||
DISALLOW_COPY(CopiedMemoryRegion);
|
||||
};
|
||||
146
libs/coding/mmap_reader.cpp
Normal file
146
libs/coding/mmap_reader.cpp
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
#include "coding/mmap_reader.hpp"
|
||||
|
||||
#include "base/scope_guard.hpp"
|
||||
|
||||
#include "std/target_os.hpp"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
#include "std/windows.hpp"
|
||||
#else
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
class MmapReader::MmapData
|
||||
{
|
||||
public:
|
||||
explicit MmapData(std::string const & fileName, Advice advice)
|
||||
{
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
m_hFile = CreateFileA(fileName.c_str(), GENERIC_READ, 0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
|
||||
if (m_hFile == INVALID_HANDLE_VALUE)
|
||||
MYTHROW(Reader::OpenException, ("Can't open file:", fileName, "win last error:", GetLastError()));
|
||||
|
||||
SCOPE_GUARD(fileGuard, [this] { CloseHandle(m_hFile); });
|
||||
|
||||
m_hMapping = CreateFileMappingA(m_hFile, nullptr, PAGE_READONLY, 0, 0, nullptr);
|
||||
if (!m_hMapping)
|
||||
MYTHROW(Reader::OpenException,
|
||||
("Can't create file's Windows mapping:", fileName, "win last error:", GetLastError()));
|
||||
|
||||
SCOPE_GUARD(mappingGuard, [this] { CloseHandle(m_hMapping); });
|
||||
|
||||
LARGE_INTEGER fileSize;
|
||||
if (!GetFileSizeEx(m_hFile, &fileSize))
|
||||
MYTHROW(Reader::OpenException, ("Can't get file size:", fileName, "win last error:", GetLastError()));
|
||||
|
||||
m_size = fileSize.QuadPart;
|
||||
m_memory = static_cast<uint8_t *>(MapViewOfFile(m_hMapping, FILE_MAP_READ, 0, 0, 0));
|
||||
if (!m_memory)
|
||||
MYTHROW(Reader::OpenException,
|
||||
("Can't create file's Windows mapping:", fileName, "win last error:", GetLastError()));
|
||||
|
||||
mappingGuard.release();
|
||||
fileGuard.release();
|
||||
#else
|
||||
m_fd = open(fileName.c_str(), O_RDONLY | O_NONBLOCK);
|
||||
if (m_fd == -1)
|
||||
MYTHROW(OpenException, ("open failed for file", fileName));
|
||||
|
||||
struct stat s;
|
||||
if (-1 == fstat(m_fd, &s))
|
||||
MYTHROW(OpenException, ("fstat failed for file", fileName));
|
||||
m_size = s.st_size;
|
||||
|
||||
m_memory = static_cast<uint8_t *>(mmap(0, static_cast<size_t>(m_size), PROT_READ, MAP_PRIVATE, m_fd, 0));
|
||||
if (m_memory == MAP_FAILED)
|
||||
{
|
||||
close(m_fd);
|
||||
MYTHROW(OpenException, ("mmap failed for file", fileName));
|
||||
}
|
||||
|
||||
int adv = MADV_NORMAL;
|
||||
switch (advice)
|
||||
{
|
||||
case Advice::Random: adv = MADV_RANDOM; break;
|
||||
case Advice::Sequential: adv = MADV_SEQUENTIAL; break;
|
||||
case Advice::Normal: adv = MADV_NORMAL; break;
|
||||
}
|
||||
|
||||
if (madvise(m_memory, static_cast<size_t>(s.st_size), adv) != 0)
|
||||
LOG(LWARNING, ("madvise error:", strerror(errno)));
|
||||
#endif
|
||||
}
|
||||
|
||||
~MmapData()
|
||||
{
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
UnmapViewOfFile(m_memory);
|
||||
|
||||
CloseHandle(m_hMapping);
|
||||
CloseHandle(m_hFile);
|
||||
#else
|
||||
munmap(m_memory, static_cast<size_t>(m_size));
|
||||
close(m_fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
uint8_t * m_memory = nullptr;
|
||||
uint64_t m_size = 0;
|
||||
|
||||
private:
|
||||
#ifdef OMIM_OS_WINDOWS
|
||||
HANDLE m_hFile;
|
||||
HANDLE m_hMapping;
|
||||
#else
|
||||
int m_fd = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
MmapReader::MmapReader(std::string const & fileName, Advice advice)
|
||||
: base_type(fileName)
|
||||
, m_data(std::make_shared<MmapData>(fileName, advice))
|
||||
, m_offset(0)
|
||||
, m_size(m_data->m_size)
|
||||
{}
|
||||
|
||||
MmapReader::MmapReader(MmapReader const & reader, uint64_t offset, uint64_t size)
|
||||
: base_type(reader.GetName())
|
||||
, m_data(reader.m_data)
|
||||
, m_offset(offset)
|
||||
, m_size(size)
|
||||
{}
|
||||
|
||||
uint64_t MmapReader::Size() const
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
void MmapReader::Read(uint64_t pos, void * p, size_t size) const
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(pos + size, Size(), (pos, size));
|
||||
memcpy(p, m_data->m_memory + m_offset + pos, size);
|
||||
}
|
||||
|
||||
std::unique_ptr<Reader> MmapReader::CreateSubReader(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(pos + size, Size(), (pos, size));
|
||||
// Can't use make_unique with private constructor.
|
||||
return std::unique_ptr<Reader>(new MmapReader(*this, m_offset + pos, size));
|
||||
}
|
||||
|
||||
uint8_t * MmapReader::Data() const
|
||||
{
|
||||
return m_data->m_memory;
|
||||
}
|
||||
|
||||
void MmapReader::SetOffsetAndSize(uint64_t offset, uint64_t size)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(offset + size, Size(), (offset, size));
|
||||
m_offset = offset;
|
||||
m_size = size;
|
||||
}
|
||||
43
libs/coding/mmap_reader.hpp
Normal file
43
libs/coding/mmap_reader.hpp
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
/// @TODO Add Windows support
|
||||
class MmapReader : public ModelReader
|
||||
{
|
||||
public:
|
||||
enum class Advice
|
||||
{
|
||||
Normal,
|
||||
Random,
|
||||
Sequential
|
||||
};
|
||||
|
||||
explicit MmapReader(std::string const & fileName, Advice advice = Advice::Normal);
|
||||
|
||||
uint64_t Size() const override;
|
||||
void Read(uint64_t pos, void * p, size_t size) const override;
|
||||
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const override;
|
||||
|
||||
/// Direct file/memory access
|
||||
uint8_t * Data() const;
|
||||
|
||||
protected:
|
||||
// Used in special derived readers.
|
||||
void SetOffsetAndSize(uint64_t offset, uint64_t size);
|
||||
|
||||
private:
|
||||
using base_type = ModelReader;
|
||||
class MmapData;
|
||||
|
||||
MmapReader(MmapReader const & reader, uint64_t offset, uint64_t size);
|
||||
|
||||
std::shared_ptr<MmapData> m_data;
|
||||
uint64_t m_offset;
|
||||
uint64_t m_size;
|
||||
};
|
||||
28
libs/coding/move_to_front.cpp
Normal file
28
libs/coding/move_to_front.cpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include "coding/move_to_front.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <numeric>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
MoveToFront::MoveToFront()
|
||||
{
|
||||
std::iota(m_order.begin(), m_order.end(), 0);
|
||||
}
|
||||
|
||||
uint8_t MoveToFront::Transform(uint8_t b)
|
||||
{
|
||||
auto const it = std::find(m_order.begin(), m_order.end(), b);
|
||||
ASSERT(it != m_order.end(), ());
|
||||
|
||||
size_t const result = std::distance(m_order.begin(), it);
|
||||
ASSERT_LESS(result, kNumBytes, ());
|
||||
|
||||
std::rotate(m_order.begin(), it, it + 1);
|
||||
ASSERT_EQUAL(m_order[0], b, ());
|
||||
return static_cast<uint8_t>(result);
|
||||
}
|
||||
} // namespace coding
|
||||
26
libs/coding/move_to_front.hpp
Normal file
26
libs/coding/move_to_front.hpp
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
class MoveToFront
|
||||
{
|
||||
public:
|
||||
static size_t constexpr kNumBytes = static_cast<size_t>(std::numeric_limits<uint8_t>::max()) + 1;
|
||||
|
||||
MoveToFront();
|
||||
|
||||
// Returns index of the byte |b| in the current sequence of bytes,
|
||||
// then moves |b| to the first position.
|
||||
uint8_t Transform(uint8_t b);
|
||||
|
||||
uint8_t operator[](uint8_t i) const { return m_order[i]; }
|
||||
|
||||
private:
|
||||
std::array<uint8_t, kNumBytes> m_order;
|
||||
};
|
||||
} // namespace coding
|
||||
84
libs/coding/parse_xml.hpp
Normal file
84
libs/coding/parse_xml.hpp
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/internal/xmlparser.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/exception.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <exception>
|
||||
|
||||
DECLARE_EXCEPTION(XmlParseError, RootException);
|
||||
|
||||
template <typename Sequence, typename XMLDispatcher>
|
||||
class XMLSequenceParser
|
||||
{
|
||||
public:
|
||||
XMLSequenceParser(Sequence & source, XMLDispatcher & dispatcher, bool useCharData = false)
|
||||
: m_res(0)
|
||||
, m_numRead(0)
|
||||
, m_source(source)
|
||||
, m_parser(dispatcher, useCharData)
|
||||
{}
|
||||
|
||||
bool Read()
|
||||
{
|
||||
char * buffer = static_cast<char *>(m_parser.GetBuffer(kBufferSize));
|
||||
ASSERT(buffer, ());
|
||||
|
||||
m_numRead = m_source.Read(buffer, kBufferSize);
|
||||
if (m_numRead == 0)
|
||||
return false;
|
||||
|
||||
if (m_parser.ParseBuffer(static_cast<uint32_t>(m_numRead), false) == XML_STATUS_ERROR)
|
||||
MYTHROW(XmlParseError, (m_parser.GetErrorMessage()));
|
||||
|
||||
m_res += m_numRead;
|
||||
return m_numRead == kBufferSize;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t static constexpr kBufferSize = 16 * 1024;
|
||||
|
||||
uint64_t m_res = 0;
|
||||
uint64_t m_numRead = 0;
|
||||
Sequence & m_source;
|
||||
XmlParser<XMLDispatcher> m_parser;
|
||||
};
|
||||
|
||||
template <class Source>
|
||||
class SequenceAdapter
|
||||
{
|
||||
public:
|
||||
SequenceAdapter(Source & source) : m_source(source) {}
|
||||
|
||||
uint64_t Read(void * p, uint64_t size)
|
||||
{
|
||||
size_t const correctSize = static_cast<size_t>(std::min(size, m_source.Size()));
|
||||
m_source.Read(p, correctSize);
|
||||
return correctSize;
|
||||
}
|
||||
|
||||
private:
|
||||
Source & m_source;
|
||||
};
|
||||
|
||||
template <typename XMLDispatcher, typename Source>
|
||||
bool ParseXML(Source & source, XMLDispatcher & dispatcher, bool useCharData = false)
|
||||
{
|
||||
SequenceAdapter<Source> adapter(source);
|
||||
XMLSequenceParser<decltype(adapter), XMLDispatcher> parser(adapter, dispatcher, useCharData);
|
||||
try
|
||||
{
|
||||
while (parser.Read()) /* empty */
|
||||
;
|
||||
}
|
||||
catch (std::exception const & e)
|
||||
{
|
||||
LOG(LWARNING, (e.what()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
146
libs/coding/point_coding.cpp
Normal file
146
libs/coding/point_coding.cpp
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
#include "coding/point_coding.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/bits.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace
|
||||
{
|
||||
double CoordSize(uint8_t coordBits)
|
||||
{
|
||||
ASSERT(coordBits >= 1 && coordBits <= 32, (coordBits));
|
||||
return static_cast<double>((uint64_t{1} << coordBits) - 1);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
uint32_t DoubleToUint32(double x, double min, double max, uint8_t coordBits)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(min, max, ());
|
||||
|
||||
double const coordSize = CoordSize(coordBits);
|
||||
|
||||
// Expand checks to avoid NANs when min == max.
|
||||
double d;
|
||||
if (x <= min)
|
||||
d = 0;
|
||||
else if (x >= max)
|
||||
d = coordSize;
|
||||
else
|
||||
d = (x - min) / (max - min) * coordSize;
|
||||
|
||||
// Check in case of NANs.
|
||||
ASSERT(d >= 0 && d <= coordSize, (d, x, min, max, coordBits));
|
||||
return static_cast<uint32_t>(0.5 + d);
|
||||
}
|
||||
|
||||
double Uint32ToDouble(uint32_t x, double min, double max, uint8_t coordBits)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(min, max, ());
|
||||
|
||||
double const coordSize = CoordSize(coordBits);
|
||||
auto const d = min + static_cast<double>(x) * (max - min) / coordSize;
|
||||
|
||||
// It doesn't work now because of fancy serialization of m2::DiamondBox.
|
||||
/// @todo Check PathsThroughLayers search test. Refactor CitiesBoundariesSerDes.
|
||||
// ASSERT_LESS_OR_EQUAL(x, coordSize, (d, min, max, coordBits));
|
||||
|
||||
// It doesn't work because of possible floating errors.
|
||||
// ASSERT(d >= min && d <= max, (d, x, min, max, coordBits));
|
||||
|
||||
return math::Clamp(d, min, max);
|
||||
}
|
||||
|
||||
m2::PointU PointDToPointU(double x, double y, uint8_t coordBits)
|
||||
{
|
||||
using mercator::Bounds;
|
||||
return {DoubleToUint32(x, Bounds::kMinX, Bounds::kMaxX, coordBits),
|
||||
DoubleToUint32(y, Bounds::kMinY, Bounds::kMaxY, coordBits)};
|
||||
}
|
||||
|
||||
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits)
|
||||
{
|
||||
return PointDToPointU(pt.x, pt.y, coordBits);
|
||||
}
|
||||
|
||||
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits, m2::RectD const & limitRect)
|
||||
{
|
||||
return {DoubleToUint32(pt.x, limitRect.minX(), limitRect.maxX(), coordBits),
|
||||
DoubleToUint32(pt.y, limitRect.minY(), limitRect.maxY(), coordBits)};
|
||||
}
|
||||
|
||||
m2::PointD PointUToPointD(m2::PointU const & pt, uint8_t coordBits)
|
||||
{
|
||||
using mercator::Bounds;
|
||||
return {Uint32ToDouble(pt.x, Bounds::kMinX, Bounds::kMaxX, coordBits),
|
||||
Uint32ToDouble(pt.y, Bounds::kMinY, Bounds::kMaxY, coordBits)};
|
||||
}
|
||||
|
||||
m2::PointD PointUToPointD(m2::PointU const & pt, uint8_t coordBits, m2::RectD const & limitRect)
|
||||
{
|
||||
return {Uint32ToDouble(pt.x, limitRect.minX(), limitRect.maxX(), coordBits),
|
||||
Uint32ToDouble(pt.y, limitRect.minY(), limitRect.maxY(), coordBits)};
|
||||
}
|
||||
|
||||
uint8_t GetCoordBits(m2::RectD const & limitRect, double accuracy)
|
||||
{
|
||||
auto const range = std::max(limitRect.SizeX(), limitRect.SizeY());
|
||||
auto const valuesNumber = 1.0 + range / accuracy;
|
||||
for (uint8_t coordBits = 1; coordBits <= 32; ++coordBits)
|
||||
if (CoordSize(coordBits) >= valuesNumber)
|
||||
return coordBits;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Obsolete functions ------------------------------------------------------------------------------
|
||||
|
||||
int64_t PointToInt64Obsolete(double x, double y, uint8_t coordBits)
|
||||
{
|
||||
int64_t const res = static_cast<int64_t>(PointUToUint64Obsolete(PointDToPointU(x, y, coordBits)));
|
||||
|
||||
ASSERT_GREATER_OR_EQUAL(res, 0, ("Highest bits of (ix, iy) are not used, so res should be > 0."));
|
||||
ASSERT_LESS_OR_EQUAL(static_cast<uint64_t>(res), uint64_t{3} << 2 * kPointCoordBits, ());
|
||||
return res;
|
||||
}
|
||||
|
||||
int64_t PointToInt64Obsolete(m2::PointD const & pt, uint8_t coordBits)
|
||||
{
|
||||
return PointToInt64Obsolete(pt.x, pt.y, coordBits);
|
||||
}
|
||||
|
||||
m2::PointD Int64ToPointObsolete(int64_t v, uint8_t coordBits)
|
||||
{
|
||||
ASSERT_GREATER_OR_EQUAL(v, 0, ("Highest bits of (ix, iy) are not used, so res should be > 0."));
|
||||
ASSERT_LESS_OR_EQUAL(static_cast<uint64_t>(v), uint64_t{3} << 2 * kPointCoordBits, ());
|
||||
return PointUToPointD(Uint64ToPointUObsolete(static_cast<uint64_t>(v)), coordBits);
|
||||
}
|
||||
|
||||
std::pair<int64_t, int64_t> RectToInt64Obsolete(m2::RectD const & r, uint8_t coordBits)
|
||||
{
|
||||
int64_t const p1 = PointToInt64Obsolete(r.minX(), r.minY(), coordBits);
|
||||
int64_t const p2 = PointToInt64Obsolete(r.maxX(), r.maxY(), coordBits);
|
||||
return std::make_pair(p1, p2);
|
||||
}
|
||||
|
||||
m2::RectD Int64ToRectObsolete(std::pair<int64_t, int64_t> const & p, uint8_t coordBits)
|
||||
{
|
||||
m2::PointD const pt1 = Int64ToPointObsolete(p.first, coordBits);
|
||||
m2::PointD const pt2 = Int64ToPointObsolete(p.second, coordBits);
|
||||
return m2::RectD(pt1, pt2);
|
||||
}
|
||||
|
||||
uint64_t PointUToUint64Obsolete(m2::PointU const & pt)
|
||||
{
|
||||
uint64_t const res = bits::BitwiseMerge(pt.x, pt.y);
|
||||
ASSERT_EQUAL(pt, Uint64ToPointUObsolete(res), ());
|
||||
return res;
|
||||
}
|
||||
|
||||
m2::PointU Uint64ToPointUObsolete(int64_t v)
|
||||
{
|
||||
m2::PointU res;
|
||||
bits::BitwiseSplit(v, res.x, res.y);
|
||||
return res;
|
||||
}
|
||||
92
libs/coding/point_coding.hpp
Normal file
92
libs/coding/point_coding.hpp
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
#pragma once
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
uint8_t constexpr kPointCoordBits = 30;
|
||||
|
||||
uint8_t constexpr kFeatureSorterPointCoordBits = 27;
|
||||
|
||||
// The absolute precision of the point encoding in the mwm files.
|
||||
// If both x and y coordinates of two points lie within |kMwmPointAccuracy| of one
|
||||
// another we consider the points equal. In other words, |kMwmPointAccuracy| may
|
||||
// be used as the eps value for both x and y in Point::EqualDxDy, AlmostEqualAbs and such.
|
||||
//
|
||||
// The constant is loosely tied to mercator::Bounds::kRangeX / (1 << kPointCoordBits):
|
||||
// The range of possible values for point coordinates
|
||||
// mercator::Bounds::kRangeX = 360.0
|
||||
// The number of distinct values for each coordinate after encoding
|
||||
// (1 << kPointCoordBits) = 1073741824 ≈ 1e9
|
||||
// Distance between two discernible points in the uniform case
|
||||
// 360.0 / 1e9 ≈ 4e-7 ≈ 0.04 * |kMwmPointAccuracy|.
|
||||
//
|
||||
// On the other hand, this should be enough for most purposes because
|
||||
// 1e-5 difference in the coordinates of a mercator-projected point corresponds to roughly
|
||||
// 1 meter difference on the equator and we do not expect most OSM points to be mapped
|
||||
// with better precision.
|
||||
//
|
||||
// todo(@m) By this argument, it seems that 1e-6 is a better choice.
|
||||
//
|
||||
// Note. generator/feature_sorter.cpp uses |kFeatureSorterPointCoordBits|,
|
||||
// effectively overriding |kPointCoordBits|. Presumably it does so to guarantee a maximum of
|
||||
// 4 bytes in the varint encoding, (27+1 sign(?) bit) / 7 = 4.
|
||||
// todo(@m) Clarify how kPointCoordBits and kFeatureSorterPointCoordBits are related.
|
||||
double constexpr kMwmPointAccuracy = 1e-5;
|
||||
|
||||
uint32_t DoubleToUint32(double x, double min, double max, uint8_t coordBits);
|
||||
|
||||
double Uint32ToDouble(uint32_t x, double min, double max, uint8_t coordBits);
|
||||
|
||||
m2::PointU PointDToPointU(double x, double y, uint8_t coordBits);
|
||||
|
||||
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits);
|
||||
|
||||
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits, m2::RectD const & limitRect);
|
||||
|
||||
m2::PointD PointUToPointD(m2::PointU const & p, uint8_t coordBits);
|
||||
|
||||
m2::PointD PointUToPointD(m2::PointU const & pt, uint8_t coordBits, m2::RectD const & limitRect);
|
||||
|
||||
// Returns coordBits needed to encode point from given rect with given absolute precision.
|
||||
// If 32 bits are not enough returns 0. It's caller's responsibility to check it.
|
||||
uint8_t GetCoordBits(m2::RectD const & limitRect, double accuracy);
|
||||
|
||||
// All functions below are deprecated and are left
|
||||
// only for backward compatibility.
|
||||
//
|
||||
// Their intention was to store a point with unsigned 32-bit integer
|
||||
// coordinates to a signed or to an unsigned 64-bit integer by interleaving the
|
||||
// bits of the point's coordinates.
|
||||
//
|
||||
// A possible reason for interleaving is to lower the number of bytes
|
||||
// needed by the varint encoding, at least if the coordinates are of the
|
||||
// same order of magnitude. However, this is hard to justify:
|
||||
// 1. We have no reason to expect the coordinates to be of the same order.
|
||||
// 2. If you need to serialize a point, doing it separately
|
||||
// for each coordinate is almost always a better option.
|
||||
// 3. If you need to temporarily store the point as an uint,
|
||||
// you do not need the complexity of interleaving.
|
||||
//
|
||||
// By VNG: Well, for polys delta encoding WriteVarUint(BitwiseMerge(x, y)) is better than
|
||||
// WriteVarUint(x) + WriteVarUint(y) by 15%. Check CitiesBoundaries_Compression test with World V0 vs V1.
|
||||
//
|
||||
// Another possible reason to interleave bits of x and y arises
|
||||
// when implementing the Z-order curve but we have this
|
||||
// written elsewhere (see geometry/cellid.hpp).
|
||||
|
||||
int64_t PointToInt64Obsolete(double x, double y, uint8_t coordBits);
|
||||
|
||||
int64_t PointToInt64Obsolete(m2::PointD const & pt, uint8_t coordBits);
|
||||
|
||||
m2::PointD Int64ToPointObsolete(int64_t v, uint8_t coordBits);
|
||||
|
||||
std::pair<int64_t, int64_t> RectToInt64Obsolete(m2::RectD const & r, uint8_t coordBits);
|
||||
|
||||
m2::RectD Int64ToRectObsolete(std::pair<int64_t, int64_t> const & p, uint8_t coordBits);
|
||||
|
||||
uint64_t PointUToUint64Obsolete(m2::PointU const & pt);
|
||||
|
||||
m2::PointU Uint64ToPointUObsolete(int64_t v);
|
||||
162
libs/coding/read_write_utils.hpp
Normal file
162
libs/coding/read_write_utils.hpp
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include "base/buffer_vector.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace rw
|
||||
{
|
||||
template <class T, class TSink>
|
||||
std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, void> Write(TSink & sink, T i)
|
||||
{
|
||||
WriteVarUint(sink, i);
|
||||
}
|
||||
|
||||
template <class T, class TSource>
|
||||
std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, void> Read(TSource & src, T & i)
|
||||
{
|
||||
i = ReadVarUint<T>(src);
|
||||
}
|
||||
|
||||
template <class T, class TSink>
|
||||
std::enable_if_t<std::is_integral_v<T> && std::is_signed_v<T>, void> Write(TSink & sink, T i)
|
||||
{
|
||||
WriteVarInt(sink, i);
|
||||
}
|
||||
|
||||
template <class T, class TSource>
|
||||
std::enable_if_t<std::is_integral_v<T> && std::is_signed_v<T>, void> Read(TSource & src, T & i)
|
||||
{
|
||||
i = ReadVarInt<T>(src);
|
||||
}
|
||||
|
||||
template <class TSink>
|
||||
void Write(TSink & sink, std::string const & s)
|
||||
{
|
||||
uint32_t const count = static_cast<uint32_t>(s.size());
|
||||
WriteVarUint(sink, count);
|
||||
if (!s.empty())
|
||||
sink.Write(&s[0], count);
|
||||
}
|
||||
|
||||
template <class TSource>
|
||||
void Read(TSource & src, std::string & s)
|
||||
{
|
||||
uint32_t const count = ReadVarUint<uint32_t>(src);
|
||||
s.resize(count);
|
||||
if (count > 0)
|
||||
src.Read(&s[0], count);
|
||||
}
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <class TSink, class TCont>
|
||||
void WriteCont(TSink & sink, TCont const & v)
|
||||
{
|
||||
uint32_t const count = static_cast<uint32_t>(v.size());
|
||||
WriteVarUint(sink, count);
|
||||
for (uint32_t i = 0; i < count; ++i)
|
||||
Write(sink, v[i]);
|
||||
}
|
||||
|
||||
template <class TSource, class TCont>
|
||||
void ReadCont(TSource & src, TCont & v)
|
||||
{
|
||||
uint32_t const count = ReadVarUint<uint32_t>(src);
|
||||
v.resize(count);
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
Read(src, v[i]);
|
||||
}
|
||||
} // namespace impl
|
||||
|
||||
template <class TSink, class T>
|
||||
void Write(TSink & sink, std::vector<T> const & v)
|
||||
{
|
||||
impl::WriteCont(sink, v);
|
||||
}
|
||||
|
||||
template <class TSource, class T>
|
||||
void Read(TSource & src, std::vector<T> & v)
|
||||
{
|
||||
impl::ReadCont(src, v);
|
||||
}
|
||||
|
||||
template <class TSink, class T, size_t N>
|
||||
void Write(TSink & sink, buffer_vector<T, N> const & v)
|
||||
{
|
||||
impl::WriteCont(sink, v);
|
||||
}
|
||||
|
||||
template <class TSource, class T, size_t N>
|
||||
void Read(TSource & src, buffer_vector<T, N> & v)
|
||||
{
|
||||
impl::ReadCont(src, v);
|
||||
}
|
||||
|
||||
template <class Sink, class T>
|
||||
void WritePOD(Sink & sink, T const & value)
|
||||
{
|
||||
static_assert(std::is_trivially_copyable<T>::value, "");
|
||||
sink.Write(&value, sizeof(T));
|
||||
}
|
||||
|
||||
template <class Sink, class T>
|
||||
void ReadPOD(Sink & src, T & value)
|
||||
{
|
||||
static_assert(std::is_trivially_copyable<T>::value, "");
|
||||
src.Read(&value, sizeof(T));
|
||||
}
|
||||
|
||||
template <class TSource, class TCont>
|
||||
void ReadVectorOfPOD(TSource & src, TCont & v)
|
||||
{
|
||||
typedef typename TCont::value_type ValueT;
|
||||
/// This assert fails on std::pair<int, int> and OsmID class because std::pair is not trivially copyable:
|
||||
/// std::pair has a non-trivial copy-assignment and move-assignment operator.
|
||||
// static_assert(std::is_trivially_copyable_v<ValueT>);
|
||||
|
||||
uint32_t const count = ReadVarUint<uint32_t>(src);
|
||||
if (count > 0)
|
||||
{
|
||||
v.resize(count);
|
||||
src.Read(&v[0], count * sizeof(ValueT));
|
||||
}
|
||||
}
|
||||
|
||||
template <class TSink, class TCont>
|
||||
void WriteVectorOfPOD(TSink & sink, TCont const & v)
|
||||
{
|
||||
typedef typename TCont::value_type ValueT;
|
||||
/// This assert fails on std::pair<int, int> and OsmID class because std::pair is not trivially copyable:
|
||||
/// std::pair has a non-trivial copy-assignment and move-assignment operator.
|
||||
// static_assert(std::is_trivially_copyable_v<ValueT>);
|
||||
|
||||
uint32_t const count = static_cast<uint32_t>(v.size());
|
||||
WriteVarUint(sink, count);
|
||||
|
||||
if (count > 0)
|
||||
sink.Write(&v[0], count * sizeof(ValueT));
|
||||
}
|
||||
|
||||
template <class ReaderT, class WriterT>
|
||||
void ReadAndWrite(ReaderT & reader, WriterT & writer, size_t bufferSize = 4 * 1024)
|
||||
{
|
||||
uint64_t size = reader.Size();
|
||||
std::vector<char> buffer(std::min(bufferSize, static_cast<size_t>(size)));
|
||||
|
||||
while (size > 0)
|
||||
{
|
||||
size_t const curr = std::min(bufferSize, static_cast<size_t>(size));
|
||||
|
||||
reader.Read(&buffer[0], curr);
|
||||
writer.Write(&buffer[0], curr);
|
||||
|
||||
size -= curr;
|
||||
}
|
||||
}
|
||||
} // namespace rw
|
||||
8
libs/coding/reader.cpp
Normal file
8
libs/coding/reader.cpp
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
#include "coding/reader.hpp"
|
||||
|
||||
void Reader::ReadAsString(std::string & s) const
|
||||
{
|
||||
s.clear();
|
||||
s.resize(static_cast<size_t>(Size()));
|
||||
Read(0 /* pos */, s.data(), s.size());
|
||||
}
|
||||
283
libs/coding/reader.hpp
Normal file
283
libs/coding/reader.hpp
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/endianness.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/exception.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
// Base class for random-access Reader. Not thread-safe.
|
||||
class Reader
|
||||
{
|
||||
public:
|
||||
DECLARE_EXCEPTION(Exception, RootException);
|
||||
DECLARE_EXCEPTION(OpenException, Exception);
|
||||
DECLARE_EXCEPTION(SizeException, Exception);
|
||||
DECLARE_EXCEPTION(ReadException, Exception);
|
||||
DECLARE_EXCEPTION(TooManyFilesException, Exception);
|
||||
|
||||
virtual ~Reader() = default;
|
||||
virtual uint64_t Size() const = 0;
|
||||
virtual void Read(uint64_t pos, void * p, size_t size) const = 0;
|
||||
virtual std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const = 0;
|
||||
|
||||
void ReadAsString(std::string & s) const;
|
||||
};
|
||||
|
||||
// Reader from memory.
|
||||
template <bool WithExceptions>
|
||||
class MemReaderTemplate : public Reader
|
||||
{
|
||||
public:
|
||||
// Construct from block of memory.
|
||||
MemReaderTemplate(void const * pData, size_t size) : m_pData(static_cast<char const *>(pData)), m_size(size) {}
|
||||
|
||||
explicit MemReaderTemplate(std::string_view data) : m_pData{data.data()}, m_size{data.size()} {}
|
||||
|
||||
uint64_t Size() const override { return m_size; }
|
||||
|
||||
void Read(uint64_t pos, void * p, size_t size) const override
|
||||
{
|
||||
AssertPosAndSize(pos, size);
|
||||
memcpy(p, m_pData + pos, size);
|
||||
}
|
||||
|
||||
MemReaderTemplate SubReader(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
AssertPosAndSize(pos, size);
|
||||
return MemReaderTemplate(m_pData + pos, static_cast<size_t>(size));
|
||||
}
|
||||
|
||||
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const override
|
||||
{
|
||||
AssertPosAndSize(pos, size);
|
||||
return std::make_unique<MemReaderTemplate>(m_pData + pos, static_cast<size_t>(size));
|
||||
}
|
||||
|
||||
private:
|
||||
bool GoodPosAndSize(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
// In case of 32-bit system, when sizeof(size_t) == 4.
|
||||
return (pos + size <= Size() && size <= std::numeric_limits<size_t>::max());
|
||||
}
|
||||
|
||||
void AssertPosAndSize(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
if constexpr (WithExceptions)
|
||||
{
|
||||
if (!GoodPosAndSize(pos, size))
|
||||
MYTHROW(Reader::SizeException, (pos, size, Size()));
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(GoodPosAndSize(pos, size), (pos, size, Size()));
|
||||
}
|
||||
}
|
||||
|
||||
char const * m_pData;
|
||||
size_t m_size;
|
||||
};
|
||||
|
||||
using MemReader = MemReaderTemplate<false>;
|
||||
using MemReaderWithExceptions = MemReaderTemplate<true>;
|
||||
|
||||
// Reader wrapper to hold the pointer to a polymorphic reader.
|
||||
// Common use: ReaderSource<ReaderPtr<Reader> >.
|
||||
// Note! It takes the ownership of Reader.
|
||||
template <class TReader>
|
||||
class ReaderPtr
|
||||
{
|
||||
protected:
|
||||
std::shared_ptr<TReader> m_p;
|
||||
|
||||
public:
|
||||
template <typename TReaderDerived>
|
||||
ReaderPtr(std::unique_ptr<TReaderDerived> p) : m_p(std::move(p))
|
||||
{}
|
||||
|
||||
uint64_t Size() const { return m_p->Size(); }
|
||||
|
||||
void Read(uint64_t pos, void * p, size_t size) const { m_p->Read(pos, p, size); }
|
||||
|
||||
void ReadAsString(std::string & s) const { m_p->ReadAsString(s); }
|
||||
|
||||
ReaderPtr<Reader> SubReader(uint64_t pos, uint64_t size) const { return {m_p->CreateSubReader(pos, size)}; }
|
||||
|
||||
TReader * GetPtr() const { return m_p.get(); }
|
||||
};
|
||||
|
||||
// Model reader store file id as string.
|
||||
class ModelReader : public Reader
|
||||
{
|
||||
std::string m_name;
|
||||
|
||||
public:
|
||||
explicit ModelReader(std::string const & name) : m_name(name) {}
|
||||
|
||||
std::string const & GetName() const { return m_name; }
|
||||
};
|
||||
|
||||
// Reader pointer class for data files.
|
||||
class ModelReaderPtr : public ReaderPtr<ModelReader>
|
||||
{
|
||||
using TBase = ReaderPtr<ModelReader>;
|
||||
|
||||
public:
|
||||
template <typename TReaderDerived>
|
||||
ModelReaderPtr(std::unique_ptr<TReaderDerived> p) : TBase(std::move(p))
|
||||
{}
|
||||
|
||||
ModelReaderPtr SubReader(uint64_t pos, uint64_t size) const
|
||||
{
|
||||
return std::unique_ptr<ModelReader>(static_cast<ModelReader *>(m_p->CreateSubReader(pos, size).release()));
|
||||
}
|
||||
|
||||
std::string const & GetName() const { return m_p->GetName(); }
|
||||
};
|
||||
|
||||
/// Source that reads from a reader and holds Reader by non-owning reference.
|
||||
/// No templates here allows to hide Deserialization functions in cpp.
|
||||
class NonOwningReaderSource
|
||||
{
|
||||
public:
|
||||
/// @note Reader shouldn't change it's size during the source's lifetime.
|
||||
explicit NonOwningReaderSource(Reader const & reader) : m_reader(reader), m_pos(0), m_end(reader.Size()) {}
|
||||
|
||||
NonOwningReaderSource(Reader const & reader, uint64_t pos, uint64_t end) : m_reader(reader), m_pos(pos), m_end(end) {}
|
||||
|
||||
void Read(void * p, size_t size)
|
||||
{
|
||||
m_reader.Read(m_pos, p, size);
|
||||
m_pos += size;
|
||||
CheckPosition();
|
||||
}
|
||||
|
||||
void Skip(uint64_t size)
|
||||
{
|
||||
m_pos += size;
|
||||
CheckPosition();
|
||||
}
|
||||
|
||||
uint64_t Pos() const { return m_pos; }
|
||||
|
||||
uint64_t Size() const
|
||||
{
|
||||
CheckPosition();
|
||||
return m_end - m_pos;
|
||||
}
|
||||
|
||||
void SetPosition(uint64_t pos)
|
||||
{
|
||||
m_pos = pos;
|
||||
CheckPosition();
|
||||
}
|
||||
|
||||
private:
|
||||
void CheckPosition() const { ASSERT_LESS_OR_EQUAL(m_pos, m_end, ()); }
|
||||
|
||||
Reader const & m_reader;
|
||||
uint64_t m_pos, m_end;
|
||||
};
|
||||
|
||||
/// Source that reads from a reader and holds Reader by value.
|
||||
template <typename TReader>
|
||||
class ReaderSource
|
||||
{
|
||||
public:
|
||||
using ReaderType = TReader;
|
||||
|
||||
ReaderSource(TReader const & reader) : m_reader(reader), m_pos(0) {}
|
||||
|
||||
void Read(void * p, size_t size)
|
||||
{
|
||||
m_reader.Read(m_pos, p, size);
|
||||
m_pos += size;
|
||||
}
|
||||
|
||||
void Skip(uint64_t size)
|
||||
{
|
||||
m_pos += size;
|
||||
ASSERT(AssertPosition(), ());
|
||||
}
|
||||
|
||||
uint64_t Pos() const { return m_pos; }
|
||||
|
||||
uint64_t Size() const
|
||||
{
|
||||
ASSERT(AssertPosition(), ());
|
||||
return (m_reader.Size() - m_pos);
|
||||
}
|
||||
|
||||
/// @todo We can avoid calling virtual Reader::SubReader and creating unique_ptr here
|
||||
/// by simply making "ReaderSource ReaderSource::SubSource(pos, end)" and storing "ReaderSource::m_end"
|
||||
/// like I did in NonOwningReaderSource. Unfortunately, it needs a lot of efforts in refactoring.
|
||||
/// @{
|
||||
TReader SubReader(uint64_t size)
|
||||
{
|
||||
uint64_t const pos = m_pos;
|
||||
Skip(size);
|
||||
return m_reader.SubReader(pos, size);
|
||||
}
|
||||
|
||||
TReader SubReader() { return SubReader(Size()); }
|
||||
|
||||
std::unique_ptr<Reader> CreateSubReader(uint64_t size)
|
||||
{
|
||||
uint64_t const pos = m_pos;
|
||||
Skip(size);
|
||||
return m_reader.CreateSubReader(pos, size);
|
||||
}
|
||||
|
||||
std::unique_ptr<Reader> CreateSubReader() { return CreateSubReader(Size()); }
|
||||
/// @}
|
||||
|
||||
private:
|
||||
bool AssertPosition() const
|
||||
{
|
||||
bool const ret = (m_pos <= m_reader.Size());
|
||||
ASSERT(ret, (m_pos, m_reader.Size()));
|
||||
return ret;
|
||||
}
|
||||
|
||||
TReader m_reader;
|
||||
uint64_t m_pos;
|
||||
};
|
||||
|
||||
template <class TReader>
|
||||
inline void ReadFromPos(TReader const & reader, uint64_t pos, void * p, size_t size)
|
||||
{
|
||||
reader.Read(pos, p, size);
|
||||
}
|
||||
|
||||
template <typename TPrimitive, class TReader>
|
||||
inline TPrimitive ReadPrimitiveFromPos(TReader const & reader, uint64_t pos)
|
||||
{
|
||||
static_assert(std::is_trivially_copyable<TPrimitive>::value);
|
||||
|
||||
TPrimitive primitive;
|
||||
ReadFromPos(reader, pos, &primitive, sizeof(primitive));
|
||||
return SwapIfBigEndianMacroBased(primitive);
|
||||
}
|
||||
|
||||
template <typename TPrimitive, class TSource>
|
||||
TPrimitive ReadPrimitiveFromSource(TSource & source)
|
||||
{
|
||||
static_assert(std::is_trivially_copyable<TPrimitive>::value);
|
||||
|
||||
TPrimitive primitive;
|
||||
source.Read(&primitive, sizeof(primitive));
|
||||
return SwapIfBigEndianMacroBased(primitive);
|
||||
}
|
||||
|
||||
template <typename TPrimitive, typename TSource>
|
||||
void ReadPrimitiveFromSource(TSource & source, TPrimitive & primitive)
|
||||
{
|
||||
primitive = ReadPrimitiveFromSource<TPrimitive, TSource>(source);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue