Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 13:58:55 +01:00
parent 4af19165ec
commit 68073add76
12458 changed files with 12350765 additions and 2 deletions

107
libs/coding/CMakeLists.txt Normal file
View file

@ -0,0 +1,107 @@
project(coding)
set(SRC
base64.cpp
base64.hpp
bit_streams.hpp
buffer_reader.hpp
buffered_file_writer.cpp
buffered_file_writer.hpp
bwt.cpp
bwt.hpp
bwt_coder.hpp
byte_stream.hpp
compressed_bit_vector.cpp
compressed_bit_vector.hpp
constants.hpp
csv_reader.cpp
csv_reader.hpp
dd_vector.hpp
diff.hpp
elias_coder.hpp
endianness.hpp
file_reader.cpp
file_reader.hpp
file_sort.hpp
file_writer.cpp
file_writer.hpp
files_container.cpp
files_container.hpp
fixed_bits_ddvector.hpp
geometry_coding.cpp
geometry_coding.hpp
hex.cpp
hex.hpp
huffman.cpp
huffman.hpp
internal/file64_api.hpp
internal/file_data.cpp
internal/file_data.hpp
internal/xmlparser.hpp
map_uint32_to_val.hpp
memory_region.hpp
mmap_reader.cpp
mmap_reader.hpp
move_to_front.cpp
move_to_front.hpp
parse_xml.hpp
point_coding.cpp
point_coding.hpp
read_write_utils.hpp
reader.cpp
reader.hpp
reader_cache.hpp
reader_streambuf.cpp
reader_streambuf.hpp
reader_wrapper.hpp
reader_writer_ops.cpp
reader_writer_ops.hpp
serdes_binary_header.hpp
serdes_json.hpp
sha1.cpp
sha1.hpp
simple_dense_coding.cpp
simple_dense_coding.hpp
sparse_vector.hpp
streams.hpp
streams_common.hpp
streams_sink.hpp
string_utf8_multilang.cpp
string_utf8_multilang.hpp
succinct_mapper.hpp
tesselator_decl.hpp
text_storage.hpp
traffic.cpp
traffic.hpp
transliteration.cpp
transliteration.hpp
url.cpp
url.hpp
value_opt_string.hpp
var_record_reader.hpp
var_serial_vector.hpp
varint.hpp
write_to_sink.hpp
writer.hpp
zip_creator.cpp
zip_creator.hpp
zip_reader.cpp
zip_reader.hpp
zlib.cpp
zlib.hpp
)
omim_add_library(${PROJECT_NAME} ${SRC})
target_link_libraries(${PROJECT_NAME}
base
expat::expat
cppjansson
succinct
ICU::uc
ICU::i18n # For transliteration.
minizip
ZLIB::ZLIB
)
omim_add_test_subdirectory(coding_tests)

41
libs/coding/base64.cpp Normal file
View file

@ -0,0 +1,41 @@
#include "coding/base64.hpp"
#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wreorder"
#elif defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-local-typedef"
#endif
#include <boost/algorithm/string.hpp>
#include <boost/archive/iterators/base64_from_binary.hpp>
#include <boost/archive/iterators/binary_from_base64.hpp>
#include <boost/archive/iterators/transform_width.hpp>
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#elif defined(__clang__)
#pragma clang diagnostic pop
#endif
namespace base64
{
// From: http://stackoverflow.com/a/28471421
std::string Decode(std::string const & val)
{
using namespace boost::archive::iterators;
using It = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
return boost::algorithm::trim_right_copy_if(std::string(It(std::begin(val)), It(std::end(val))),
[](char c) { return c == '\0'; });
}
std::string Encode(std::string_view val)
{
using namespace boost::archive::iterators;
using It = base64_from_binary<transform_width<std::string_view::const_iterator, 6, 8>>;
auto tmp = std::string(It(std::begin(val)), It(std::end(val)));
return tmp.append((3 - val.size() % 3) % 3, '=');
}
} // namespace base64

9
libs/coding/base64.hpp Normal file
View file

@ -0,0 +1,9 @@
#pragma once
#include <string>
namespace base64
{
std::string Encode(std::string_view bytesToEncode);
std::string Decode(std::string const & base64CharsToDecode);
} // namespace base64

217
libs/coding/bit_streams.hpp Normal file
View file

@ -0,0 +1,217 @@
#pragma once
#include "base/assert.hpp"
#include "base/bits.hpp"
#include "base/logging.hpp"
#include <algorithm>
#include <climits>
#include <cstdint>
static_assert(CHAR_BIT == 8);
template <typename TWriter>
class BitWriter
{
static uint8_t constexpr kMinBits = CHAR_BIT;
public:
explicit BitWriter(TWriter & writer) : m_writer(writer), m_buf(0), m_bitsWritten(0) {}
~BitWriter()
{
try
{
Flush();
}
catch (...)
{
LOG(LWARNING, ("Caught an exception when flushing BitWriter."));
}
}
// Returns the number of bits that have been sent to BitWriter,
// including those that are in m_buf and are possibly not flushed
// yet.
uint64_t BitsWritten() const { return m_bitsWritten; }
// Writes n bits starting with the least significant bit. They are
// written one byte at a time so endianness is of no concern.
void Write(uint8_t bits, uint8_t n)
{
if (n == 0)
return;
bits = bits & bits::GetFullMask(n);
ASSERT_LESS_OR_EQUAL(n, CHAR_BIT, ());
uint32_t bufferedBits = m_bitsWritten % CHAR_BIT;
m_bitsWritten += n;
if (n + bufferedBits > CHAR_BIT)
{
uint8_t b = (bits << bufferedBits) | m_buf;
m_writer.Write(&b, 1);
m_buf = bits >> (CHAR_BIT - bufferedBits);
}
else
{
if (bufferedBits > 0)
{
bits = (bits << bufferedBits) | m_buf;
n += bufferedBits;
}
if (n == CHAR_BIT)
{
m_writer.Write(&bits, 1);
bits = 0;
}
m_buf = bits;
}
}
#define WRITE_BYTE() \
{ \
Write(bits, std::min(kMinBits, n)); \
if (n <= kMinBits) \
return; \
n -= kMinBits; \
bits >>= kMinBits; \
}
// Same as Write but accept up to 32 bits to write.
void WriteAtMost32Bits(uint32_t bits, uint8_t n)
{
ASSERT_LESS_OR_EQUAL(n, 32, ());
WRITE_BYTE();
WRITE_BYTE();
WRITE_BYTE();
Write(bits, n);
}
// Same as Write but accept up to 64 bits to write.
void WriteAtMost64Bits(uint64_t bits, uint8_t n)
{
ASSERT_LESS_OR_EQUAL(n, 64, ());
WRITE_BYTE();
WRITE_BYTE();
WRITE_BYTE();
WRITE_BYTE();
WRITE_BYTE();
WRITE_BYTE();
WRITE_BYTE();
Write(bits, n);
}
#undef WRITE_BYTE
private:
// Writes up to CHAR_BIT-1 last bits if they have not been written
// yet and pads them with zeros. This method cannot be made public
// because once a byte has been flushed there is no going back.
void Flush()
{
if (m_bitsWritten % CHAR_BIT != 0)
m_writer.Write(&m_buf, 1);
}
TWriter & m_writer;
uint8_t m_buf;
uint64_t m_bitsWritten;
};
template <typename TSource>
class BitReader
{
static uint8_t constexpr kMinBits = CHAR_BIT;
public:
explicit BitReader(TSource & src) : m_src(src), m_bitsRead(0), m_bufferedBits(0), m_buf(0) {}
// Returns the total number of bits read from this BitReader.
uint64_t BitsRead() const { return m_bitsRead; }
// Reads n bits and returns them as the least significant bits of an
// 8-bit number. The underlying m_src is supposed to be
// byte-aligned (which is the case when it reads from the place that
// was written to using BitWriter). Read may use one lookahead
// byte.
uint8_t Read(uint8_t n)
{
if (n == 0)
return 0;
uint8_t constexpr kByteMask = 0xFF;
ASSERT_LESS_OR_EQUAL(n, CHAR_BIT, ());
m_bitsRead += n;
uint8_t result = 0;
if (n <= m_bufferedBits)
{
result = m_buf & (kByteMask >> (CHAR_BIT - n));
m_bufferedBits -= n;
m_buf >>= n;
}
else
{
uint8_t nextByte;
m_src.Read(&nextByte, 1);
uint32_t low = n - m_bufferedBits;
result = ((nextByte & (kByteMask >> (CHAR_BIT - low))) << m_bufferedBits) | m_buf;
m_buf = nextByte >> low;
m_bufferedBits = CHAR_BIT - low;
}
return result;
}
#define READ_BYTE(i) \
{ \
result = result | (static_cast<decltype(result)>(Read(std::min(n, kMinBits))) << (i * kMinBits)); \
if (n <= kMinBits) \
return result; \
n -= kMinBits; \
}
// Same as Read but accept up to 32 bits to read.
uint32_t ReadAtMost32Bits(uint8_t n)
{
ASSERT_LESS_OR_EQUAL(n, 32, ());
uint32_t result = 0;
READ_BYTE(0);
READ_BYTE(1);
READ_BYTE(2);
return result | (static_cast<uint32_t>(Read(n)) << (3 * kMinBits));
}
// Same as Read but accept up to 64 bits to read.
uint64_t ReadAtMost64Bits(uint8_t n)
{
ASSERT_LESS_OR_EQUAL(n, 64, ());
uint64_t result = 0;
READ_BYTE(0);
READ_BYTE(1);
READ_BYTE(2);
READ_BYTE(3);
READ_BYTE(4);
READ_BYTE(5);
READ_BYTE(6);
return result | (static_cast<uint64_t>(Read(n)) << (7 * kMinBits));
}
#undef READ_BYTE
private:
TSource & m_src;
uint64_t m_bitsRead;
uint32_t m_bufferedBits;
uint8_t m_buf;
};

View file

@ -0,0 +1,70 @@
#pragma once
#include "coding/reader.hpp"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
/// Reader from buffer with ownership on it, but cheap copy constructor.
class BufferReader : public Reader
{
public:
template <class ReaderT>
explicit BufferReader(ReaderT const & reader, uint64_t offset = 0)
{
uint64_t const rSize = reader.Size();
ASSERT_LESS_OR_EQUAL(offset, rSize, (offset, rSize));
InitBuffer(static_cast<size_t>(rSize - offset));
reader.Read(offset, m_data.get(), m_size);
}
explicit BufferReader(char const * p, size_t count)
{
InitBuffer(count);
memcpy(m_data.get(), p, count);
}
uint64_t Size() const { return m_size; }
void Read(uint64_t pos, void * p, size_t size) const
{
ASSERT_LESS_OR_EQUAL(pos + size, Size(), (pos, size));
memcpy(p, m_data.get() + static_cast<size_t>(pos) + m_offset, size);
}
BufferReader SubReader(uint64_t pos, uint64_t size) const { return BufferReader(*this, pos, size); }
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const
{
// Can't use make_unique with private constructor.
return std::unique_ptr<Reader>(new BufferReader(*this, pos, size));
}
private:
BufferReader(BufferReader const & src, uint64_t pos, uint64_t size) : m_data(src.m_data)
{
ASSERT_LESS_OR_EQUAL(pos + size, src.Size(), (pos, size));
m_offset = static_cast<size_t>(src.m_offset + pos);
m_size = static_cast<size_t>(size);
}
void InitBuffer(size_t count)
{
m_offset = 0;
m_size = count;
m_data.reset(new char[m_size], Deleter());
}
size_t m_offset, m_size;
struct Deleter
{
void operator()(char * p) { delete[] p; }
};
std::shared_ptr<char> m_data;
};

View file

@ -0,0 +1,75 @@
#include "coding/buffered_file_writer.hpp"
#include "coding/internal/file_data.hpp"
#include "base/assert.hpp"
BufferedFileWriter::BufferedFileWriter(std::string const & fileName, Op operation /* = OP_WRITE_TRUNCATE */,
size_t bufferSize /* = 4096 */)
: FileWriter(fileName, operation)
{
CHECK_GREATER(bufferSize, 0, ());
m_buf.reserve(bufferSize);
}
BufferedFileWriter::~BufferedFileWriter() noexcept(false)
{
DropBuffer();
}
void BufferedFileWriter::Seek(uint64_t pos)
{
DropBuffer();
FileWriter::Seek(pos);
}
uint64_t BufferedFileWriter::Pos() const
{
return FileWriter::Pos() + m_buf.size();
}
void BufferedFileWriter::Write(void const * p, size_t size)
{
// Need to use pointer arithmetic.
auto src = static_cast<uint8_t const *>(p);
while (size >= m_buf.capacity() - m_buf.size())
{
if (m_buf.empty())
{
FileWriter::Write(src, m_buf.capacity());
src += m_buf.capacity();
size -= m_buf.capacity();
}
else
{
auto const copyCount = m_buf.capacity() - m_buf.size();
std::copy(src, src + copyCount, std::back_inserter(m_buf));
DropBuffer();
src += copyCount;
size -= copyCount;
}
}
std::copy(src, src + size, std::back_inserter(m_buf));
}
uint64_t BufferedFileWriter::Size() const
{
return FileWriter::Size() + m_buf.size();
}
void BufferedFileWriter::Flush()
{
DropBuffer();
FileWriter::Flush();
}
void BufferedFileWriter::DropBuffer()
{
if (m_buf.empty())
return;
FileWriter::Write(m_buf.data(), m_buf.size());
m_buf.clear();
}

View file

@ -0,0 +1,29 @@
#pragma once
#include "coding/file_writer.hpp"
#include <cstdint>
#include <string>
#include <vector>
class BufferedFileWriter : public FileWriter
{
public:
explicit BufferedFileWriter(std::string const & fileName, Op operation = OP_WRITE_TRUNCATE, size_t bufferSize = 4096);
~BufferedFileWriter() noexcept(false) override;
// Writer overrides:
void Seek(uint64_t pos) override;
uint64_t Pos() const override;
void Write(void const * p, size_t size) override;
// FileWriter overrides:
uint64_t Size() const override;
void Flush() override;
private:
void DropBuffer();
std::vector<uint8_t> m_buf;
};

193
libs/coding/bwt.cpp Normal file
View file

@ -0,0 +1,193 @@
#include "coding/bwt.hpp"
#include "base/assert.hpp"
#include "base/suffix_array.hpp"
#include <algorithm>
#include <array>
#include <limits>
#include <vector>
namespace
{
size_t constexpr kNumBytes = 256;
// Fake trailing '$' for the BWT, used for original string
// reconstruction.
uint32_t constexpr kEOS = 256;
// FirstColumn represents the first column in the BWT matrix. As
// during reverse BWT we need to reconstruct canonical first column,
// with '$' as the first element, this wrapper is used. Also note that
// other characters in the first column are sorted, so we actually
// don't need to store them explicitly, it's enough to store start
// positions of the corresponding groups of consecutive characters.
class FirstColumn
{
public:
FirstColumn(size_t n, uint8_t const * s) : m_n(n), m_starts({})
{
m_starts.fill(0);
for (size_t i = 0; i < n; ++i)
++m_starts[s[i]];
size_t offset = 0;
for (size_t i = 0; i < m_starts.size(); ++i)
{
auto const count = m_starts[i];
m_starts[i] = offset;
offset += count;
}
}
size_t Size() const { return m_n + 1; }
uint32_t operator[](size_t i) const
{
ASSERT_LESS(i, Size(), ());
if (i == 0)
return kEOS;
--i;
auto it = std::upper_bound(m_starts.begin(), m_starts.end(), i);
ASSERT(it != m_starts.begin(), ());
--it;
return static_cast<uint32_t>(std::distance(m_starts.begin(), it));
}
// Returns the rank of the i-th symbol among symbols with the same
// value.
size_t Rank(size_t i) const
{
ASSERT_LESS(i, Size(), ());
if (i == 0)
return 0;
--i;
auto it = std::upper_bound(m_starts.begin(), m_starts.end(), i);
if (it == m_starts.begin())
return i;
--it;
return i - *it;
}
private:
size_t const m_n;
std::array<size_t, kNumBytes> m_starts;
};
// LastColumn represents the last column in the BWT matrix. As during
// reverse BWT we need to reconstruct canonical last column, |s| is
// replaced by s[start] + s[0, start) + '$' + s[start, n).
class LastColumn
{
public:
LastColumn(size_t n, size_t start, uint8_t const * s) : m_n(n), m_start(start), m_s(s)
{
for (size_t i = 0; i < Size(); ++i)
{
auto const b = (*this)[i];
if (b == kEOS)
continue;
ASSERT_LESS(b, kNumBytes, ());
m_table[b].push_back(i);
}
}
size_t Size() const { return m_n + 1; }
uint32_t operator[](size_t i) const
{
if (i == 0)
{
ASSERT_LESS(m_start, m_n, ());
return m_s[m_start];
}
if (i == m_start + 1)
return kEOS;
ASSERT_LESS_OR_EQUAL(i, m_n, ());
return m_s[i - 1];
}
// Returns the index of the |rank|-th |byte| in the canonical BWT
// last column.
size_t Select(uint32_t byte, size_t rank)
{
if (byte == kEOS)
{
ASSERT_EQUAL(rank, 0, ());
return 0;
}
ASSERT_LESS(rank, m_table[byte].size(), (byte, rank));
return m_table[byte][rank];
}
private:
size_t const m_n;
size_t const m_start;
uint8_t const * const m_s;
std::array<std::vector<size_t>, kNumBytes> m_table;
};
} // namespace
namespace coding
{
size_t BWT(size_t n, uint8_t const * s, uint8_t * r)
{
std::vector<size_t> sa(n);
base::Skew(n, s, sa.data());
size_t result = 0;
for (size_t i = 0; i < n; ++i)
{
if (sa[i] != 0)
{
r[i] = s[sa[i] - 1];
}
else
{
result = i;
r[i] = s[n - 1];
}
}
return result;
}
size_t BWT(std::string const & s, std::string & r)
{
auto const n = s.size();
r.assign(n, '\0');
return BWT(n, reinterpret_cast<uint8_t const *>(s.data()), reinterpret_cast<uint8_t *>(&r[0]));
}
void RevBWT(size_t n, size_t start, uint8_t const * s, uint8_t * r)
{
if (n == 0)
return;
FirstColumn first(n, s);
LastColumn last(n, start, s);
auto curr = start + 1;
for (size_t i = 0; i < n; ++i)
{
ASSERT_LESS(curr, first.Size(), ());
ASSERT(first[curr] != kEOS, ());
r[i] = first[curr];
curr = last.Select(r[i], first.Rank(curr));
}
ASSERT_EQUAL(first[curr], kEOS, ());
}
void RevBWT(size_t start, std::string const & s, std::string & r)
{
auto const n = s.size();
r.assign(n, '\0');
RevBWT(n, start, reinterpret_cast<uint8_t const *>(s.data()), reinterpret_cast<uint8_t *>(&r[0]));
}
} // namespace coding

60
libs/coding/bwt.hpp Normal file
View file

@ -0,0 +1,60 @@
#pragma once
#include <cstdint>
#include <string>
namespace coding
{
// Computes the Burrows-Wheeler transform of the string |s|, stores
// result in the string |r|. Note - the size of |r| must be |n|.
// Returns the index of the original string among the all sorted
// rotations of the |s|.
//
// *NOTE* in contrast to popular explanations of BWT, we do not append
// to |s| trailing '$' that is less than any other character in |s|.
// The reason is that |s| can be an arbitrary byte string, with zero
// bytes inside, so implementation of this trailing '$' is expensive,
// and, actually, not needed.
//
// For example, if |s| is "abaaba", canonical BWT is:
//
// Sorted rotations: canonical BWT:
// $abaaba a
// a$abaab b
// aaba$ab b
// aba$aba a
// * abaaba$ $
// ba$abaa a
// baaba$a a
//
// where '*' denotes original string.
//
// Our implementation will sort rotations in a way as there is an
// implicit '$' that is less than any other byte in |s|, but does not
// return this '$'. Therefore, the order of rotations will be the same
// as above, without the first '$abaaba':
//
// Sorted rotations: ours BWT:
// aabaab b
// aabaab b
// abaaba a
// * abaaba a
// baabaa a
// baabaa a
//
// where '*' denotes the index of original string. As one can see,
// there are two 'abaaba' strings, but as mentioned, rotations are
// sorted like there is an implicit '$' at the end of the original
// string. It's possible to get from "ours BWT" to the "original BWT",
// see the code for details.
//
// Complexity: O(n) time and O(n) memory.
size_t BWT(size_t n, uint8_t const * s, uint8_t * r);
size_t BWT(std::string const & s, std::string & r);
// Inverse Burrows-Wheeler transform.
//
// Complexity: O(n) time and O(n) memory.
void RevBWT(size_t n, size_t start, uint8_t const * s, uint8_t * r);
void RevBWT(size_t start, std::string const & s, std::string & r);
} // namespace coding

125
libs/coding/bwt_coder.hpp Normal file
View file

@ -0,0 +1,125 @@
#pragma once
#include "coding/bwt.hpp"
#include "coding/huffman.hpp"
#include "coding/move_to_front.hpp"
#include "coding/varint.hpp"
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <string>
#include <vector>
namespace coding
{
class BWTCoder
{
public:
using BufferT = std::vector<uint8_t>;
struct Params
{
size_t m_blockSize = 32000;
};
template <typename Sink>
static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s, BufferT & bwtBuffer)
{
bwtBuffer.resize(n);
auto const start = BWT(n, s, bwtBuffer.data());
MoveToFront mtf;
for (auto & b : bwtBuffer)
b = mtf.Transform(b);
WriteVarUint(sink, start);
HuffmanCoder huffman;
huffman.Init(bwtBuffer.begin(), bwtBuffer.end());
huffman.WriteEncoding(sink);
huffman.EncodeAndWrite(sink, bwtBuffer.begin(), bwtBuffer.end());
}
template <typename Sink>
static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s)
{
BufferT bwtBuffer;
EncodeAndWriteBlock(sink, n, s, bwtBuffer);
}
template <typename Sink>
static void EncodeAndWriteBlock(Sink & sink, std::string const & s)
{
EncodeAndWriteBlock(sink, s.size(), reinterpret_cast<uint8_t const *>(s.data()));
}
template <typename Sink>
static void EncodeAndWrite(Params const & params, Sink & sink, size_t n, uint8_t const * s)
{
CHECK(params.m_blockSize != 0, ());
CHECK_GREATER(n + params.m_blockSize, n, ());
BufferT bwtBuffer;
size_t const numBlocks = (n + params.m_blockSize - 1) / params.m_blockSize;
WriteVarUint(sink, numBlocks);
for (size_t i = 0; i < n; i += params.m_blockSize)
{
auto const m = std::min(n - i, params.m_blockSize);
EncodeAndWriteBlock(sink, m, s + i, bwtBuffer);
}
}
template <typename Source>
static void ReadAndDecodeBlock(Source & source, BufferT & bwtBuffer, BufferT & revBuffer)
{
auto const start = ReadVarUint<uint64_t, Source>(source);
HuffmanCoder huffman;
huffman.ReadEncoding(source);
bwtBuffer.clear();
huffman.ReadAndDecode(source, std::back_inserter(bwtBuffer));
size_t const n = bwtBuffer.size();
MoveToFront mtf;
for (size_t i = 0; i < n; ++i)
{
auto const b = mtf[bwtBuffer[i]];
bwtBuffer[i] = b;
mtf.Transform(b);
}
if (n != 0)
CHECK_LESS(start, n, ());
revBuffer.resize(n);
RevBWT(n, static_cast<size_t>(start), bwtBuffer.data(), revBuffer.data());
}
template <typename Source>
static BufferT ReadAndDecodeBlock(Source & source)
{
BufferT bwtBuffer, revBuffer;
ReadAndDecodeBlock(source, bwtBuffer, revBuffer);
return revBuffer;
}
template <typename Source, typename OutIt>
static OutIt ReadAndDecode(Source & source, OutIt it)
{
auto const numBlocks = ReadVarUint<uint64_t, Source>(source);
CHECK_LESS(numBlocks, std::numeric_limits<size_t>::max(), ());
BufferT bwtBuffer, revBuffer;
for (size_t i = 0; i < static_cast<size_t>(numBlocks); ++i)
{
ReadAndDecodeBlock(source, bwtBuffer, revBuffer);
std::copy(revBuffer.begin(), revBuffer.end(), it);
}
return it;
}
};
} // namespace coding

View file

@ -0,0 +1,59 @@
#pragma once
#include "base/base.hpp"
#include <cstddef>
#include <cstdint>
#include <cstring>
class ArrayByteSource
{
public:
explicit ArrayByteSource(void const * p) : m_p(static_cast<uint8_t const *>(p)) {}
uint8_t ReadByte() { return *m_p++; }
void Read(void * ptr, size_t size)
{
memcpy(ptr, m_p, size);
m_p += size;
}
void const * Ptr() const { return m_p; }
uint8_t const * PtrUint8() const { return m_p; }
void Advance(size_t size) { m_p += size; }
private:
uint8_t const * m_p;
};
template <class StorageT>
class PushBackByteSink
{
public:
explicit PushBackByteSink(StorageT & storage) : m_Storage(storage) {}
void Write(void const * p, size_t size)
{
// assume input buffer as buffer of bytes
uint8_t const * pp = static_cast<uint8_t const *>(p);
m_Storage.insert(m_Storage.end(), pp, pp + size);
}
size_t Pos() const { return m_Storage.size(); }
private:
StorageT & m_Storage;
};
class CountingSink
{
public:
CountingSink() : m_Count(0) {}
inline void Write(void const *, size_t size) { m_Count += size; }
inline size_t GetCount() const { return m_Count; }
private:
size_t m_Count;
};

View file

@ -0,0 +1,60 @@
project(coding_tests)
set(SRC
base64_test.cpp
bit_streams_test.cpp
bwt_coder_tests.cpp
bwt_tests.cpp
compressed_bit_vector_test.cpp
csv_reader_test.cpp
dd_vector_test.cpp
diff_test.cpp
elias_coder_test.cpp
endianness_test.cpp
file_data_test.cpp
file_sort_test.cpp
files_container_tests.cpp
fixed_bits_ddvector_test.cpp
geometry_coding_test.cpp
hex_test.cpp
huffman_test.cpp
map_uint32_to_val_tests.cpp
mem_file_reader_test.cpp
mem_file_writer_test.cpp
move_to_front_tests.cpp
png_decoder_test.cpp
point_coding_tests.cpp
reader_cache_test.cpp
reader_test.cpp
reader_test.hpp
reader_writer_ops_test.cpp
serdes_json_test.cpp
simple_dense_coding_test.cpp
sha1_test.cpp
sparse_vector_tests.cpp
string_utf8_multilang_tests.cpp
succinct_ef_test.cpp
succinct_mapper_test.cpp
test_polylines.cpp
test_polylines.hpp
text_storage_tests.cpp
traffic_test.cpp
url_tests.cpp
value_opt_string_test.cpp
var_record_reader_test.cpp
var_serial_vector_test.cpp
varint_test.cpp
writer_test.cpp
xml_parser_tests.cpp
zip_creator_test.cpp
zip_reader_test.cpp
zlib_test.cpp
)
omim_add_test(${PROJECT_NAME} ${SRC})
target_link_libraries(${PROJECT_NAME}
platform_tests_support # For csv_reader_test (TODO: Move ScopedFile into a base header)
geometry # For traffic_test and serdes_json_test (TODO: Remove dependency)
coding
)

View file

@ -0,0 +1,28 @@
#include "testing/testing.hpp"
#include "coding/base64.hpp"
using namespace base64;
UNIT_TEST(Base64_Smoke)
{
char const * bytes[] = {"H", "He", "Hel", "Hell", "Hello", "Hello,", "Hello, ", "Hello, World!"};
char const * encoded[] = {
"SA==", "SGU=", "SGVs", "SGVsbA==", "SGVsbG8=", "SGVsbG8s", "SGVsbG8sIA==", "SGVsbG8sIFdvcmxkIQ=="};
TEST_EQUAL(ARRAY_SIZE(bytes), ARRAY_SIZE(encoded), ());
for (size_t i = 0; i < ARRAY_SIZE(bytes); ++i)
{
TEST_EQUAL(Encode(bytes[i]), encoded[i], ());
TEST_EQUAL(Decode(encoded[i]), bytes[i], ());
TEST_EQUAL(Decode(Encode(bytes[i])), bytes[i], ());
TEST_EQUAL(Encode(Decode(encoded[i])), encoded[i], ());
}
char const * str = "MapsWithMe is the offline maps application for any device in the world.";
char const * encStr =
"TWFwc1dpdGhNZSBpcyB0aGUgb2ZmbGluZSBtYXBzIGFwcGxpY2F0aW9uIGZvciBhbnkgZGV2aWNlIGluIHRoZSB3b3JsZC4=";
TEST_EQUAL(Encode(str), encStr, ());
TEST_EQUAL(Decode(encStr), str, ());
}

View file

@ -0,0 +1,105 @@
#include "testing/testing.hpp"
#include "coding/bit_streams.hpp"
#include "coding/reader.hpp"
#include "coding/writer.hpp"
#include "base/assert.hpp"
#include "base/bits.hpp"
#include <cstddef>
#include <cstdint>
#include <random>
#include <utility>
#include <vector>
using namespace std;
namespace
{
UNIT_TEST(BitStreams_Smoke)
{
uniform_int_distribution<uint32_t> randomBytesDistribution(0, 255);
mt19937 rng(0);
vector<pair<uint8_t, uint32_t>> nums;
for (size_t i = 0; i < 100; ++i)
{
uint32_t numBits = randomBytesDistribution(rng) % 8;
uint8_t num = static_cast<uint8_t>(randomBytesDistribution(rng) >> (CHAR_BIT - numBits));
nums.push_back(make_pair(num, numBits));
}
for (size_t i = 0; i < 100; ++i)
{
uint32_t numBits = 8;
uint8_t num = static_cast<uint8_t>(randomBytesDistribution(rng));
nums.push_back(make_pair(num, numBits));
}
vector<uint8_t> encodedBits;
{
MemWriter<vector<uint8_t>> encodedBitsWriter(encodedBits);
BitWriter<MemWriter<vector<uint8_t>>> bitSink(encodedBitsWriter);
for (size_t i = 0; i < nums.size(); ++i)
bitSink.Write(nums[i].first, nums[i].second);
}
MemReader encodedBitsReader(encodedBits.data(), encodedBits.size());
ReaderSource<MemReader> src(encodedBitsReader);
BitReader<ReaderSource<MemReader>> bitsSource(src);
for (size_t i = 0; i < nums.size(); ++i)
{
uint8_t num = bitsSource.Read(nums[i].second);
TEST_EQUAL(num, nums[i].first, (i));
}
}
UNIT_TEST(BitStreams_T1)
{
using TBuffer = vector<uint8_t>;
using TWriter = MemWriter<TBuffer>;
TBuffer buf;
{
TWriter w(buf);
BitWriter<TWriter> bits(w);
bits.Write(0, 3);
bits.Write(3, 3);
bits.Write(6, 3);
}
TEST_EQUAL(buf.size(), 2, ());
}
UNIT_TEST(BitStreams_Large)
{
using TBuffer = vector<uint8_t>;
using TWriter = MemWriter<TBuffer>;
uint64_t const kMask = 0x0123456789abcdef;
TBuffer buf;
{
TWriter w(buf);
BitWriter<TWriter> bits(w);
for (int i = 0; i <= 64; ++i)
if (i <= 32)
bits.WriteAtMost32Bits(static_cast<uint32_t>(kMask), i);
else
bits.WriteAtMost64Bits(kMask, i);
}
{
MemReader r(buf.data(), buf.size());
ReaderSource<MemReader> src(r);
BitReader<ReaderSource<MemReader>> bits(src);
for (int i = 0; i <= 64; ++i)
{
uint64_t const mask = bits::GetFullMask(i);
uint64_t const value = i <= 32 ? bits.ReadAtMost32Bits(i) : bits.ReadAtMost64Bits(i);
TEST_EQUAL(value, kMask & mask, (i));
}
}
}
} // namespace

View file

@ -0,0 +1,107 @@
#include "testing/testing.hpp"
#include "coding/bwt_coder.hpp"
#include "coding/reader.hpp"
#include "coding/writer.hpp"
#include <algorithm>
#include <iterator>
#include <random>
#include <string>
using namespace coding;
using namespace std;
namespace
{
string EncodeDecode(BWTCoder::Params const & params, string const & s)
{
vector<uint8_t> data;
{
MemWriter<decltype(data)> sink(data);
BWTCoder::EncodeAndWrite(params, sink, s.size(), reinterpret_cast<uint8_t const *>(s.data()));
}
string result;
{
MemReader reader(data.data(), data.size());
ReaderSource<MemReader> source(reader);
BWTCoder::ReadAndDecode(source, back_inserter(result));
}
return result;
}
string EncodeDecodeBlock(string const & s)
{
vector<uint8_t> data;
{
MemWriter<decltype(data)> sink(data);
BWTCoder::EncodeAndWriteBlock(sink, s.size(), reinterpret_cast<uint8_t const *>(s.data()));
}
string result;
{
MemReader reader(data.data(), data.size());
ReaderSource<MemReader> source(reader);
auto const buffer = BWTCoder::ReadAndDecodeBlock(source);
result.assign(buffer.begin(), buffer.end());
}
return result;
}
UNIT_TEST(BWTEncoder_Smoke)
{
for (size_t blockSize = 1; blockSize < 100; ++blockSize)
{
BWTCoder::Params params;
params.m_blockSize = blockSize;
string const s = "abracadabra";
TEST_EQUAL(s, EncodeDecodeBlock(s), ());
TEST_EQUAL(s, EncodeDecode(params, s), (blockSize));
}
string const strings[] = {"", "mississippi", "again and again and again"};
for (auto const & s : strings)
{
TEST_EQUAL(s, EncodeDecodeBlock(s), ());
TEST_EQUAL(s, EncodeDecode(BWTCoder::Params{}, s), ());
}
}
UNIT_TEST(BWT_Large)
{
string s;
for (size_t i = 0; i < 10000; ++i)
s += "mississippi";
TEST_EQUAL(s, EncodeDecode(BWTCoder::Params{}, s), ());
}
UNIT_TEST(BWT_AllBytes)
{
int kSeed = 42;
int kMin = 1;
int kMax = 1000;
mt19937 engine(kSeed);
uniform_int_distribution<int> uid(kMin, kMax);
string s;
for (size_t i = 0; i < 256; ++i)
{
auto const count = uid(engine);
ASSERT_GREATER_OR_EQUAL(count, kMin, ());
ASSERT_LESS_OR_EQUAL(count, kMax, ());
for (int j = 0; j < count; ++j)
s.push_back(static_cast<uint8_t>(i));
}
shuffle(s.begin(), s.end(), engine);
TEST_EQUAL(s, EncodeDecode(BWTCoder::Params{}, s), ());
}
} // namespace

View file

@ -0,0 +1,90 @@
#include "testing/testing.hpp"
#include "coding/bwt.hpp"
#include <algorithm>
#include <random>
#include <string>
using namespace coding;
using namespace std;
namespace
{
string RevRevBWT(string const & s)
{
string r;
auto const start = BWT(s, r);
string rr;
RevBWT(start, r, rr);
return rr;
}
UNIT_TEST(BWT_Smoke)
{
{
TEST_EQUAL(BWT(0 /* n */, nullptr /* s */, nullptr /* r */), 0, ());
}
{
string r;
TEST_EQUAL(BWT(string() /* s */, r /* r */), 0, ());
}
{
string const s = "aaaaaa";
string r;
TEST_EQUAL(BWT(s, r), 5, ());
TEST_EQUAL(r, s, ());
}
{
string const s = "mississippi";
string r;
TEST_EQUAL(BWT(s, r), 4, ());
TEST_EQUAL(r, "pssmipissii", ());
}
}
UNIT_TEST(RevBWT_Smoke)
{
string const strings[] = {"abaaba", "mississippi", "a b b", "Again and again and again"};
for (auto const & s : strings)
TEST_EQUAL(s, RevRevBWT(s), ());
for (size_t i = 0; i < 100; ++i)
{
string const s(i, '\0');
TEST_EQUAL(s, RevRevBWT(s), ());
}
for (size_t i = 0; i < 100; ++i)
{
string const s(i, 'a' + (i % 3));
TEST_EQUAL(s, RevRevBWT(s), ());
}
}
UNIT_TEST(RevBWT_AllBytes)
{
int kSeed = 42;
int kMin = 1;
int kMax = 10;
mt19937 engine(kSeed);
uniform_int_distribution<int> uid(kMin, kMax);
string s;
for (size_t i = 0; i < 256; ++i)
{
auto const count = uid(engine);
ASSERT_GREATER_OR_EQUAL(count, kMin, ());
ASSERT_LESS_OR_EQUAL(count, kMax, ());
for (int j = 0; j < count; ++j)
s.push_back(static_cast<uint8_t>(i));
}
shuffle(s.begin(), s.end(), engine);
TEST_EQUAL(s, RevRevBWT(s), ());
}
} // namespace

View file

@ -0,0 +1,455 @@
#include "testing/testing.hpp"
#include "coding/compressed_bit_vector.hpp"
#include "coding/writer.hpp"
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <memory>
#include <set>
#include <vector>
using namespace std;
namespace
{
void Intersect(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<uint64_t> & result)
{
sort(setBits1.begin(), setBits1.end());
sort(setBits2.begin(), setBits2.end());
set_intersection(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(), back_inserter(result));
}
void Subtract(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<uint64_t> & result)
{
sort(setBits1.begin(), setBits1.end());
sort(setBits2.begin(), setBits2.end());
set_difference(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(), back_inserter(result));
}
void Union(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<uint64_t> & result)
{
sort(setBits1.begin(), setBits1.end());
sort(setBits2.begin(), setBits2.end());
set_union(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(), back_inserter(result));
}
template <typename TBinaryOp>
void CheckBinaryOp(TBinaryOp op, vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
coding::CompressedBitVector const & cbv)
{
vector<uint64_t> expected;
op(setBits1, setBits2, expected);
TEST_EQUAL(expected.size(), cbv.PopCount(), ());
for (size_t i = 0; i < expected.size(); ++i)
TEST(cbv.GetBit(expected[i]), ());
}
void CheckIntersection(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
coding::CompressedBitVector const & cbv)
{
CheckBinaryOp(&Intersect, setBits1, setBits2, cbv);
}
void CheckSubtraction(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, coding::CompressedBitVector const & cbv)
{
CheckBinaryOp(&Subtract, setBits1, setBits2, cbv);
}
void CheckUnion(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, coding::CompressedBitVector const & cbv)
{
CheckBinaryOp(&Union, setBits1, setBits2, cbv);
}
void CheckUnion(vector<uint64_t> & setBits1, coding::CompressedBitVector::StorageStrategy strategy1,
vector<uint64_t> & setBits2, coding::CompressedBitVector::StorageStrategy strategy2,
coding::CompressedBitVector::StorageStrategy resultStrategy)
{
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
TEST_EQUAL(strategy1, cbv1->GetStorageStrategy(), ());
TEST_EQUAL(strategy2, cbv2->GetStorageStrategy(), ());
auto cbv3 = coding::CompressedBitVector::Union(*cbv1, *cbv2);
TEST(cbv3.get(), ());
TEST_EQUAL(resultStrategy, cbv3->GetStorageStrategy(), ());
CheckUnion(setBits1, setBits2, *cbv3);
}
} // namespace
UNIT_TEST(CompressedBitVector_Intersect1)
{
size_t const kNumBits = 100;
vector<uint64_t> setBits1;
vector<uint64_t> setBits2;
for (size_t i = 0; i < kNumBits; ++i)
{
if (i > 0)
setBits1.push_back(i);
if (i + 1 < kNumBits)
setBits2.push_back(i);
}
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
CheckIntersection(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Intersect2)
{
size_t const kNumBits = 100;
vector<uint64_t> setBits1;
vector<uint64_t> setBits2;
for (size_t i = 0; i < kNumBits; ++i)
{
if (i <= kNumBits / 2)
setBits1.push_back(i);
if (i >= kNumBits / 2)
setBits2.push_back(i);
}
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
CheckIntersection(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Intersect3)
{
size_t const kNumBits = 100;
vector<uint64_t> setBits1;
vector<uint64_t> setBits2;
for (size_t i = 0; i < kNumBits; ++i)
{
if (i % 2 == 0)
setBits1.push_back(i);
if (i % 3 == 0)
setBits2.push_back(i);
}
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
CheckIntersection(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Intersect4)
{
size_t const kNumBits = 1000;
vector<uint64_t> setBits1;
vector<uint64_t> setBits2;
for (size_t i = 0; i < kNumBits; ++i)
{
if (i % 100 == 0)
setBits1.push_back(i);
if (i % 150 == 0)
setBits2.push_back(i);
}
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
CheckIntersection(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Subtract1)
{
vector<uint64_t> setBits1 = {0, 1, 2, 3, 4, 5, 6};
vector<uint64_t> setBits2 = {1, 2, 3, 4, 5, 6, 7};
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv1->GetStorageStrategy(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv2->GetStorageStrategy(), ());
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
TEST(cbv3.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
CheckSubtraction(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Subtract2)
{
vector<uint64_t> setBits1;
for (size_t i = 0; i < 100; ++i)
setBits1.push_back(i);
vector<uint64_t> setBits2 = {9, 14};
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv1->GetStorageStrategy(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv2->GetStorageStrategy(), ());
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
TEST(cbv3.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
CheckSubtraction(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Subtract3)
{
vector<uint64_t> setBits1 = {0, 9};
vector<uint64_t> setBits2 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv1->GetStorageStrategy(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv2->GetStorageStrategy(), ());
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
TEST(cbv3.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
CheckSubtraction(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Subtract4)
{
vector<uint64_t> setBits1 = {0, 5, 15};
vector<uint64_t> setBits2 = {0, 10};
auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
TEST(cbv1.get(), ());
TEST(cbv2.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv1->GetStorageStrategy(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv2->GetStorageStrategy(), ());
auto cbv3 = coding::CompressedBitVector::Subtract(*cbv1, *cbv2);
TEST(cbv3.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
CheckSubtraction(setBits1, setBits2, *cbv3);
}
UNIT_TEST(CompressedBitVector_Union_Smoke)
{
vector<uint64_t> setBits1 = {};
vector<uint64_t> setBits2 = {};
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Sparse /* strategy1 */, setBits2,
coding::CompressedBitVector::StorageStrategy::Sparse /* strategy2 */,
coding::CompressedBitVector::StorageStrategy::Sparse /* resultStrategy */);
}
UNIT_TEST(CompressedBitVector_Union1)
{
vector<uint64_t> setBits1 = {};
vector<uint64_t> setBits2 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Sparse /* strategy1 */, setBits2,
coding::CompressedBitVector::StorageStrategy::Dense /* strategy2 */,
coding::CompressedBitVector::StorageStrategy::Dense /* resultStrategy */);
}
UNIT_TEST(CompressedBitVector_Union2)
{
vector<uint64_t> setBits1 = {256, 1024};
vector<uint64_t> setBits2 = {0, 32, 64};
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Sparse /* strategy1 */, setBits2,
coding::CompressedBitVector::StorageStrategy::Sparse /* strategy2 */,
coding::CompressedBitVector::StorageStrategy::Sparse /* resultStrategy */);
}
UNIT_TEST(CompressedBitVector_Union3)
{
vector<uint64_t> setBits1 = {0, 1, 2, 3, 4, 5, 6};
vector<uint64_t> setBits2;
for (int i = 0; i < 256; ++i)
setBits2.push_back(i);
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Dense /* strategy1 */, setBits2,
coding::CompressedBitVector::StorageStrategy::Dense /* strategy2 */,
coding::CompressedBitVector::StorageStrategy::Dense /* resultStrategy */);
}
UNIT_TEST(CompressedBitVector_Union4)
{
vector<uint64_t> setBits1;
for (uint64_t i = 0; i < coding::DenseCBV::kBlockSize; ++i)
setBits1.push_back(i);
vector<uint64_t> setBits2 = {1000000000};
CheckUnion(setBits1, coding::CompressedBitVector::StorageStrategy::Dense /* strategy1 */, setBits2,
coding::CompressedBitVector::StorageStrategy::Sparse /* strategy2 */,
coding::CompressedBitVector::StorageStrategy::Sparse /* resultStrategy */);
}
UNIT_TEST(CompressedBitVector_SerializationDense)
{
int const kNumBits = 100;
vector<uint64_t> setBits;
for (size_t i = 0; i < kNumBits; ++i)
setBits.push_back(i);
vector<uint8_t> buf;
{
MemWriter<vector<uint8_t>> writer(buf);
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv->GetStorageStrategy(), ());
cbv->Serialize(writer);
}
MemReader reader(buf.data(), buf.size());
auto cbv = coding::CompressedBitVectorBuilder::DeserializeFromReader(reader);
TEST(cbv.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv->GetStorageStrategy(), ());
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
for (size_t i = 0; i < setBits.size(); ++i)
TEST(cbv->GetBit(setBits[i]), ());
}
UNIT_TEST(CompressedBitVector_SerializationSparse)
{
int const kNumBits = 100;
vector<uint64_t> setBits;
for (size_t i = 0; i < kNumBits; ++i)
if (i % 10 == 0)
setBits.push_back(i);
vector<uint8_t> buf;
{
MemWriter<vector<uint8_t>> writer(buf);
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv->GetStorageStrategy(), ());
cbv->Serialize(writer);
}
MemReader reader(buf.data(), buf.size());
auto cbv = coding::CompressedBitVectorBuilder::DeserializeFromReader(reader);
TEST(cbv.get(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv->GetStorageStrategy(), ());
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
for (size_t i = 0; i < setBits.size(); ++i)
TEST(cbv->GetBit(setBits[i]), ());
}
UNIT_TEST(CompressedBitVector_ForEach)
{
int const kNumBits = 150;
vector<uint64_t> denseBits;
vector<uint64_t> sparseBits;
for (size_t i = 0; i < kNumBits; ++i)
{
denseBits.push_back(i);
if (i % 15 == 0)
sparseBits.push_back(i);
}
auto denseCBV = coding::CompressedBitVectorBuilder::FromBitPositions(denseBits);
auto sparseCBV = coding::CompressedBitVectorBuilder::FromBitPositions(sparseBits);
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, denseCBV->GetStorageStrategy(), ());
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, sparseCBV->GetStorageStrategy(), ());
set<uint64_t> denseSet;
uint64_t maxPos = 0;
coding::CompressedBitVectorEnumerator::ForEach(*denseCBV, [&](uint64_t pos)
{
denseSet.insert(pos);
maxPos = max(maxPos, pos);
});
TEST_EQUAL(denseSet.size(), kNumBits, ());
TEST_EQUAL(maxPos, kNumBits - 1, ());
coding::CompressedBitVectorEnumerator::ForEach(*sparseCBV, [](uint64_t pos) { TEST_EQUAL(pos % 15, 0, ()); });
}
UNIT_TEST(CompressedBitVector_DenseOneBit)
{
vector<uint64_t> setBits = {0};
unique_ptr<coding::DenseCBV> cbv(new coding::DenseCBV(setBits));
TEST_EQUAL(cbv->PopCount(), 1, ());
coding::CompressedBitVectorEnumerator::ForEach(*cbv, [&](uint64_t pos) { TEST_EQUAL(pos, 0, ()); });
}
UNIT_TEST(CompressedBitVector_LeaveFirstNBitsSmoke)
{
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(vector<uint64_t>{});
TEST_EQUAL(cbv->PopCount(), 0, ());
cbv = cbv->LeaveFirstSetNBits(0);
TEST_EQUAL(cbv->PopCount(), 0, ());
cbv = cbv->LeaveFirstSetNBits(200);
TEST_EQUAL(cbv->PopCount(), 0, ());
}
UNIT_TEST(CompressedBitVector_DenseLeaveFirstNBits)
{
{
vector<uint64_t> setBits;
setBits.assign(coding::DenseCBV::kBlockSize * 4, 1);
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
TEST_EQUAL(cbv->PopCount(), coding::DenseCBV::kBlockSize * 4, ());
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ());
cbv = cbv->LeaveFirstSetNBits(0);
TEST_EQUAL(cbv->PopCount(), 0, ());
}
{
vector<uint64_t> setBits;
for (uint64_t i = 0; i < 100; ++i)
setBits.push_back(2 * i);
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
TEST_EQUAL(cbv->PopCount(), 100, ());
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ());
cbv = cbv->LeaveFirstSetNBits(50);
TEST_EQUAL(cbv->PopCount(), 50, ());
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ());
for (uint64_t i = 0; i < 50; ++i)
{
TEST(cbv->GetBit(2 * i), ());
TEST(!cbv->GetBit(2 * i + 1), ());
}
}
}
UNIT_TEST(CompressedBitVector_SparseLeaveFirstNBits)
{
vector<uint64_t> setBits;
for (int p = 0; p < 10; ++p)
setBits.push_back(static_cast<uint64_t>(1) << p);
auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits);
TEST_EQUAL(cbv->PopCount(), 10, ());
TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Sparse, ());
cbv = cbv->LeaveFirstSetNBits(100);
TEST_EQUAL(cbv->PopCount(), 10, ());
for (uint64_t bit = 0; bit < (1 << 10); ++bit)
if (bit != 0 && (bit & (bit - 1)) == 0)
TEST(cbv->GetBit(bit), (bit));
else
TEST(!cbv->GetBit(bit), (bit));
cbv = cbv->LeaveFirstSetNBits(8);
TEST_EQUAL(cbv->PopCount(), 8, ());
for (uint64_t bit = 0; bit < (1 << 10); ++bit)
if (bit != 0 && (bit & (bit - 1)) == 0 && bit < (1 << 8))
TEST(cbv->GetBit(bit), (bit));
else
TEST(!cbv->GetBit(bit), (bit));
cbv = cbv->LeaveFirstSetNBits(0);
TEST_EQUAL(cbv->PopCount(), 0, ());
for (uint64_t bit = 0; bit < (1 << 10); ++bit)
TEST(!cbv->GetBit(bit), (bit));
}

View file

@ -0,0 +1,224 @@
#include "testing/testing.hpp"
#include "coding/csv_reader.hpp"
#include "coding/file_reader.hpp"
#include "platform/platform_tests_support/scoped_file.hpp"
#include <string>
#include <vector>
namespace csv_reader_test
{
using platform::tests_support::ScopedFile;
using Row = coding::CSVReader::Row;
using Rows = coding::CSVReader::Rows;
namespace
{
std::string const kCSV1 = "a,b,c,d\ne,f,g h";
std::string const kCSV2 = "a,b,cd a b, c";
std::string const kCSV3 = "";
std::string const kCSV4 = "1,2\n3,4\n5,6";
std::string const kCSV5 = "1,2\n3,4\n\n5,6\n";
} // namespace
UNIT_TEST(CSVReaderSmoke)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV1);
{
FileReader fileReader(sf.GetFullPath());
coding::CSVReader reader(fileReader, false /* hasHeader */);
auto const file = reader.ReadAll();
TEST_EQUAL(file.size(), 2, ());
Row const firstRow = {"a", "b", "c", "d"};
TEST_EQUAL(file[0], firstRow, ());
Row const secondRow = {"e", "f", "g h"};
TEST_EQUAL(file[1], secondRow, ());
}
{
FileReader fileReader(sf.GetFullPath());
coding::CSVReader reader(fileReader, true /* hasHeader */);
auto const file = reader.ReadAll();
TEST_EQUAL(file.size(), 1, ());
Row const headerRow = {"a", "b", "c", "d"};
TEST_EQUAL(reader.GetHeader(), headerRow, ());
}
}
UNIT_TEST(CSVReaderReadLine)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV4);
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
coding::CSVReader reader(sf.GetFullPath());
size_t index = 0;
while (auto const optionalRow = reader.ReadRow())
{
TEST_EQUAL(*optionalRow, answer[index], ());
++index;
}
TEST_EQUAL(index, answer.size(), ());
TEST(!reader.ReadRow(), ());
TEST(!reader.ReadRow(), ());
}
UNIT_TEST(CSVReaderCustomDelimiter)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV2);
FileReader fileReader(sf.GetFullPath());
coding::CSVReader reader(fileReader, false /* hasHeader */, ' ');
auto const file = reader.ReadAll();
TEST_EQUAL(file.size(), 1, ());
Row const firstRow = {"a,b,cd", "a", "b,", "c"};
TEST_EQUAL(file[0], firstRow, ());
}
UNIT_TEST(CSVReaderEmptyFile)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV3);
FileReader fileReader(sf.GetFullPath());
coding::CSVReader reader(fileReader);
auto const file = reader.ReadAll();
TEST_EQUAL(file.size(), 0, ());
}
UNIT_TEST(CSVReaderDifferentReaders)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV4);
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
{
FileReader fileReader(sf.GetFullPath());
coding::CSVReader reader(fileReader);
auto const file = reader.ReadAll();
TEST_EQUAL(file, answer, ());
}
{
coding::CSVReader reader(sf.GetFullPath());
auto const file = reader.ReadAll();
TEST_EQUAL(file, answer, ());
}
{
std::ifstream stream(sf.GetFullPath());
coding::CSVReader reader(stream);
auto const file = reader.ReadAll();
TEST_EQUAL(file, answer, ());
}
}
UNIT_TEST(CSVReaderEmptyLines)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV5);
Rows const answer = {{"1", "2"}, {"3", "4"}, {}, {"5", "6"}};
{
FileReader fileReader(sf.GetFullPath());
coding::CSVReader reader(fileReader);
auto const file = reader.ReadAll();
TEST_EQUAL(file, answer, ());
}
{
coding::CSVReader reader(sf.GetFullPath());
auto const file = reader.ReadAll();
TEST_EQUAL(file, answer, ());
}
{
std::ifstream stream(sf.GetFullPath());
coding::CSVReader reader(stream);
auto const file = reader.ReadAll();
TEST_EQUAL(file, answer, ());
}
}
UNIT_TEST(CSVReaderForEachRow)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV4);
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
FileReader fileReader(sf.GetFullPath());
auto reader = coding::CSVReader(fileReader);
size_t index = 0;
reader.ForEachRow([&](auto const & row)
{
TEST_EQUAL(row, answer[index], ());
++index;
});
TEST_EQUAL(answer.size(), index, ());
}
UNIT_TEST(CSVReaderIterator)
{
auto const fileName = "test.csv";
ScopedFile sf(fileName, kCSV4);
Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}};
{
FileReader fileReader(sf.GetFullPath());
coding::CSVRunner runner((coding::CSVReader(fileReader)));
auto it = runner.begin();
TEST_EQUAL(*it, answer[0], ());
++it;
TEST_EQUAL(*it, answer[1], ());
auto it2 = it++;
TEST(it2 == it, ());
TEST_EQUAL(*it2, answer[1], ());
TEST_EQUAL(*it, answer[2], ());
++it;
TEST(it == runner.end(), ());
}
{
size_t index = 0;
for (auto const & row : coding::CSVRunner(coding::CSVReader(sf.GetFullPath())))
{
TEST_EQUAL(row, answer[index], ());
++index;
}
TEST_EQUAL(index, answer.size(), ());
}
}
UNIT_TEST(CSVReaderEmptyColumns)
{
auto const kContentWithEmptyColumns = ",,2,,4,\n,,,,,";
auto const fileName = "test.csv";
ScopedFile sf(fileName, kContentWithEmptyColumns);
Rows const answer = {{"", "", "2", "", "4", ""}, {"", "", "", "", "", ""}};
coding::CSVReader reader(sf.GetFullPath());
size_t index = 0;
while (auto const optionalRow = reader.ReadRow())
{
TEST_EQUAL(*optionalRow, answer[index], ());
++index;
}
TEST_EQUAL(index, answer.size(), ());
TEST(!reader.ReadRow(), ());
TEST(!reader.ReadRow(), ());
}
UNIT_TEST(CSVReaderQuotes)
{
auto const kContentWithQuotes =
R"(noquotes, "" , "with space","with, comma","""double"" quotes","""double,"", commas", """""",)";
auto const fileName = "test.csv";
ScopedFile sf(fileName, kContentWithQuotes);
Rows const answer = {
{"noquotes", "", "with space", "with, comma", "\"double\" quotes", "\"double,\", commas", "\"\"", ""}};
coding::CSVReader reader(sf.GetFullPath());
size_t index = 0;
while (auto const optionalRow = reader.ReadRow())
{
TEST_EQUAL(*optionalRow, answer[index], ());
++index;
}
TEST_EQUAL(index, answer.size(), ());
TEST(!reader.ReadRow(), ());
TEST(!reader.ReadRow(), ());
}
} // namespace csv_reader_test

View file

@ -0,0 +1,45 @@
#include "testing/testing.hpp"
#include "coding/dd_vector.hpp"
#include "coding/reader.hpp"
#include <cstdint>
#include <vector>
UNIT_TEST(DDVector_Smoke)
{
std::vector<uint16_t> data;
// Push size. Big endian is used.
data.push_back(1);
data.push_back(2);
data.push_back(3);
typedef DDVector<uint16_t, MemReader> Vector;
MemReader reader(reinterpret_cast<char const *>(&data[0]), data.size() * sizeof(data[0]));
Vector v(reader);
TEST_EQUAL(3, v.size(), ());
TEST_EQUAL(1, v[0], ());
TEST_EQUAL(2, v[1], ());
TEST_EQUAL(3, v[2], ());
Vector::const_iterator it = v.begin();
for (auto const value : v)
TEST_EQUAL(value, *it++, ());
}
UNIT_TEST(DDVector_IncorrectSize)
{
typedef DDVector<uint16_t, MemReader> Vector;
char const data[] = "ab";
MemReader reader(data, ARRAY_SIZE(data));
bool exceptionCaught = false;
try
{
Vector v(reader);
}
catch (Vector::OpenException & e)
{
exceptionCaught = true;
}
TEST(exceptionCaught, ());
}

View file

@ -0,0 +1,278 @@
#include "testing/testing.hpp"
#include "coding/byte_stream.hpp"
#include "coding/dd_vector.hpp"
#include "coding/diff.hpp"
#include "coding/reader.hpp"
#include "base/rolling_hash.hpp"
#include <cstddef>
#include <cstdint>
#include <sstream>
#include <string>
#include <vector>
using namespace std;
UNIT_TEST(MyersSimpleDiff)
{
vector<char> tmp;
PushBackByteSink<vector<char>> sink(tmp);
TEST_EQUAL(4, diff::DiffMyersSimple(string("axxxb"), string("cxxxd"), 5, sink), ());
TEST_EQUAL(5, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 10, sink), ());
TEST_EQUAL(5, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 5, sink), ());
TEST_EQUAL(-1, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 4, sink), ());
TEST_EQUAL(-1, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 2, sink), ());
TEST_EQUAL(-1, diff::DiffMyersSimple(string("abcabba"), string("cbabac"), 1, sink), ());
}
class TestPatchWriter
{
public:
template <typename IterT>
void WriteData(IterT it, uint64_t n)
{
for (uint64_t i = 0; i < n; ++i, ++it)
m_Stream << *it;
}
void WriteOperation(uint64_t op) { m_Stream << op << "."; }
string Str() { return m_Stream.str(); }
private:
ostringstream m_Stream;
};
UNIT_TEST(PatchCoderCopyFirst)
{
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
patchCoder.Copy(2);
patchCoder.Copy(1);
patchCoder.Insert("ab", 2);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "6.ab5.", ());
}
UNIT_TEST(PatchCoderInsertFirst)
{
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
patchCoder.Insert("abc", 3);
patchCoder.Copy(3);
patchCoder.Insert("d", 1);
patchCoder.Insert("e", 1);
patchCoder.Delete(5);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "abc7.6.de5.11.", ());
}
UNIT_TEST(PatchCoderDeleteFirst)
{
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
patchCoder.Delete(3);
patchCoder.Copy(2);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "6.5.", ());
}
UNIT_TEST(PatchCoderEmptyPatch)
{
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "", ());
}
// PatchCoder mock.
// Uses simple diff format "=x.-x.+str" where x is number, "." - operation separator, str - string.
// Ignores commands with n == 0, but doesn't merge same commands together, i.e. "=2.=2." won't be
// merged into "=4."
class TestPatchCoder
{
public:
typedef size_t size_type;
void Copy(size_t n)
{
if (n != 0)
m_Stream << "=" << n << ".";
}
void Delete(size_t n)
{
if (n != 0)
m_Stream << "-" << n << ".";
}
template <typename IterT>
void Insert(IterT it, size_t n)
{
if (n == 0)
return;
m_Stream << "+";
for (size_t i = 0; i < n; ++i, ++it)
m_Stream << *it;
m_Stream << ".";
}
void Finalize() {}
string Str() { return m_Stream.str(); }
private:
ostringstream m_Stream;
};
UNIT_TEST(DiffSimpleReplace)
{
char const src[] = "abcxxxdef";
char const dst[] = "abcyydef";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::SimpleReplaceDiffer differ;
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "=3.-3.+yy.=3.", ());
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "6.6.yy4.6.", ());
}
UNIT_TEST(DiffSimpleReplaceEmptyBegin)
{
char const src[] = "xxxdef";
char const dst[] = "yydef";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::SimpleReplaceDiffer differ;
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "-3.+yy.=3.", ());
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "6.yy4.6.", ());
}
UNIT_TEST(DiffSimpleReplaceEmptyEnd)
{
char const src[] = "abcxxx";
char const dst[] = "abcyy";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::SimpleReplaceDiffer differ;
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "=3.-3.+yy.", ());
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "6.6.yy4.", ());
}
UNIT_TEST(DiffSimpleReplaceAllEqual)
{
char const src[] = "abcdef";
char const dst[] = "abcdef";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::SimpleReplaceDiffer differ;
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "=6.", ());
TestPatchWriter patchWriter;
diff::PatchCoder<TestPatchWriter> patchCoder(patchWriter);
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), patchCoder);
patchCoder.Finalize();
TEST_EQUAL(patchWriter.Str(), "12.", ());
}
UNIT_TEST(DiffWithRollingHashEqualStrings)
{
char const src[] = "abcdefklmno";
char const dst[] = "abcdefklmno";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "=3.=3.=3.=2.", ());
}
UNIT_TEST(DiffWithRollingHashCompletelyDifferentStrings)
{
char const src[] = "pqrstuvw";
char const dst[] = "abcdefgh";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "-8.+abcdefgh.", ());
}
UNIT_TEST(DiffWithRollingHash1)
{
char const src[] = "abcdefghijklmnop";
char const dst[] = "abcdfeghikkklmnop";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "=3.=1.-2.+fe.=3.-1.+kk.=2.=3.=1.", ());
}
UNIT_TEST(DiffWithRollingHash2)
{
char const src[] = "abcdefghijklmnop";
char const dst[] = "abxdeflmnop";
MemReader srcReader(src, ARRAY_SIZE(src) - 1);
MemReader dstReader(dst, ARRAY_SIZE(dst) - 1);
DDVector<char, MemReader> srcV(srcReader); // since sizeof(char) == 1
DDVector<char, MemReader> dstV(dstReader); // since sizeof(char) == 1
diff::RollingHashDiffer<diff::SimpleReplaceDiffer, RollingHasher64> differ(3);
TestPatchCoder testPatchCoder;
differ.Diff(srcV.begin(), srcV.end(), dstV.begin(), dstV.end(), testPatchCoder);
TEST_EQUAL(testPatchCoder.Str(), "=2.-1.+x.=3.-5.=1.=3.=1.", ());
}

View file

@ -0,0 +1,62 @@
#include "testing/testing.hpp"
#include "coding/bit_streams.hpp"
#include "coding/elias_coder.hpp"
#include "coding/reader.hpp"
#include "coding/writer.hpp"
#include "base/bits.hpp"
#include <cstdint>
#include <string>
#include <vector>
namespace
{
template <typename TCoder>
void TestCoder(std::string const & name)
{
using TBuffer = std::vector<uint8_t>;
using TWriter = MemWriter<TBuffer>;
uint64_t const kMask = 0xfedcba9876543210;
TBuffer buf;
{
TWriter w(buf);
BitWriter<TWriter> bits(w);
for (int i = 0; i <= 64; ++i)
{
uint64_t const mask = bits::GetFullMask(i);
uint64_t const value = kMask & mask;
if (value == 0)
TEST(!TCoder::Encode(bits, value), (name, i));
else
TEST(TCoder::Encode(bits, value), (name, i));
}
}
{
MemReader r(buf.data(), buf.size());
ReaderSource<MemReader> src(r);
BitReader<ReaderSource<MemReader>> bits(src);
for (int i = 0; i <= 64; ++i)
{
uint64_t const mask = bits::GetFullMask(i);
uint64_t const expected = kMask & mask;
if (expected == 0)
continue;
TEST_EQUAL(expected, TCoder::Decode(bits), (name, i));
}
}
}
UNIT_TEST(EliasCoder_Gamma)
{
TestCoder<coding::GammaCoder>("Gamma");
}
UNIT_TEST(EliasCoder_Delta)
{
TestCoder<coding::DeltaCoder>("Delta");
}
} // namespace

View file

@ -0,0 +1,40 @@
#include "testing/testing.hpp"
#include "coding/endianness.hpp"
UNIT_TEST(Endianness1Byte)
{
TEST_EQUAL(uint8_t(0), ReverseByteOrder<uint8_t>(0), ());
TEST_EQUAL(uint8_t(17), ReverseByteOrder<uint8_t>(17), ());
TEST_EQUAL(uint8_t(255), ReverseByteOrder<uint8_t>(255), ());
TEST_EQUAL(uint8_t(0), ReverseByteOrder<uint8_t>(0), ());
TEST_EQUAL(uint8_t(17), ReverseByteOrder<uint8_t>(17), ());
TEST_EQUAL(uint8_t(255), ReverseByteOrder<uint8_t>(255), ());
}
UNIT_TEST(Endianness12Bytes)
{
TEST_EQUAL(uint16_t(0), ReverseByteOrder<uint16_t>(0), ());
TEST_EQUAL(uint16_t(256), ReverseByteOrder<uint16_t>(1), ());
TEST_EQUAL(uint16_t(0xE8FD), ReverseByteOrder<uint16_t>(0xFDE8), ());
TEST_EQUAL(uint16_t(0xFFFF), ReverseByteOrder<uint16_t>(0xFFFF), ());
TEST_EQUAL(uint16_t(0), ReverseByteOrder<uint16_t>(0), ());
TEST_EQUAL(uint16_t(256), ReverseByteOrder<uint16_t>(1), ());
TEST_EQUAL(uint16_t(0xE8FD), ReverseByteOrder<uint16_t>(0xFDE8), ());
TEST_EQUAL(uint16_t(0xFFFF), ReverseByteOrder<uint16_t>(0xFFFF), ());
}
UNIT_TEST(Endianness18Bytes)
{
TEST_EQUAL(0ULL, ReverseByteOrder(0ULL), ());
TEST_EQUAL(1ULL, ReverseByteOrder(1ULL << 56), ());
TEST_EQUAL(0xE2E4D7D5B1C3B8C6ULL, ReverseByteOrder(0xC6B8C3B1D5D7E4E2ULL), ());
TEST_EQUAL(0xFFFFFFFFFFFFFFFFULL, ReverseByteOrder(0xFFFFFFFFFFFFFFFFULL), ());
TEST_EQUAL(0ULL, ReverseByteOrder(0ULL), ());
TEST_EQUAL(1ULL, ReverseByteOrder(1ULL << 56), ());
TEST_EQUAL(0xE2E4D7D5B1C3B8C6ULL, ReverseByteOrder(0xC6B8C3B1D5D7E4E2ULL), ());
TEST_EQUAL(0xFFFFFFFFFFFFFFFFULL, ReverseByteOrder(0xFFFFFFFFFFFFFFFFULL), ());
}

View file

@ -0,0 +1,278 @@
#include "testing/testing.hpp"
#include "coding/internal/file_data.hpp"
#include <cstring> // strlen
#include <fstream>
#include <string>
namespace file_data_test
{
std::string const name1 = "test1.file";
std::string const name2 = "test2.file";
void MakeFile(std::string const & name)
{
base::FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
f.Write(name.c_str(), name.size());
}
void MakeFile(std::string const & name, size_t const size, char const c)
{
base::FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
f.Write(std::string(size, c).c_str(), size);
}
#ifdef OMIM_OS_WINDOWS
void CheckFileOK(std::string const & name)
{
base::FileData f(name, base::FileData::Op::READ);
uint64_t const size = f.Size();
TEST_EQUAL(size, name.size(), ());
std::vector<char> buffer(size);
f.Read(0, &buffer[0], size);
TEST(equal(name.begin(), name.end(), buffer.begin()), ());
}
#endif
UNIT_TEST(FileData_ApiSmoke)
{
MakeFile(name1);
uint64_t const size = name1.size();
uint64_t sz;
TEST(base::GetFileSize(name1, sz), ());
TEST_EQUAL(sz, size, ());
TEST(base::RenameFileX(name1, name2), ());
TEST(!base::GetFileSize(name1, sz), ());
TEST(base::GetFileSize(name2, sz), ());
TEST_EQUAL(sz, size, ());
TEST(base::DeleteFileX(name2), ());
TEST(!base::GetFileSize(name2, sz), ());
}
/*
UNIT_TEST(FileData_NoDiskSpace)
{
char const * name = "/Volumes/KINDLE/file.bin";
vector<uint8_t> bytes(100000000);
try
{
base::FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
for (size_t i = 0; i < 100; ++i)
f.Write(&bytes[0], bytes.size());
}
catch (Writer::Exception const & ex)
{
LOG(LINFO, ("Writer exception catched"));
}
(void)base::DeleteFileX(name);
}
*/
/*
#ifdef OMIM_OS_WINDOWS
UNIT_TEST(FileData_SharingAV_Windows)
{
{
MakeFile(name1);
// lock file, will check sharing access
base::FileData f1(name1, base::FileData::Op::READ);
// try rename or delete locked file
TEST(!base::RenameFileX(name1, name2), ());
TEST(!base::DeleteFileX(name1), ());
MakeFile(name2);
// try rename or copy to locked file
TEST(!base::RenameFileX(name2, name1), ());
TEST(!base::CopyFileX(name2, name1), ());
// files should be unchanged
CheckFileOK(name1);
CheckFileOK(name2);
//TEST(base::CopyFile(name1, name2), ());
}
// renaming to existing file is not allowed
TEST(!base::RenameFileX(name1, name2), ());
TEST(!base::RenameFileX(name2, name1), ());
TEST(base::DeleteFileX(name1), ());
TEST(base::DeleteFileX(name2), ());
}
#endif
*/
UNIT_TEST(Equal_Function_Test)
{
MakeFile(name1);
MakeFile(name2);
TEST(base::IsEqualFiles(name1, name1), ());
TEST(base::IsEqualFiles(name2, name2), ());
TEST(!base::IsEqualFiles(name1, name2), ());
TEST(base::DeleteFileX(name1), ());
TEST(base::DeleteFileX(name2), ());
}
UNIT_TEST(Equal_Function_Test_For_Big_Files)
{
{
MakeFile(name1, 1024 * 1024, 'a');
MakeFile(name2, 1024 * 1024, 'a');
TEST(base::IsEqualFiles(name1, name2), ());
TEST(base::DeleteFileX(name1), ());
TEST(base::DeleteFileX(name2), ());
}
{
MakeFile(name1, 1024 * 1024 + 512, 'a');
MakeFile(name2, 1024 * 1024 + 512, 'a');
TEST(base::IsEqualFiles(name1, name2), ());
TEST(base::DeleteFileX(name1), ());
TEST(base::DeleteFileX(name2), ());
}
{
MakeFile(name1, 1024 * 1024 + 1, 'a');
MakeFile(name2, 1024 * 1024 + 1, 'b');
TEST(base::IsEqualFiles(name1, name1), ());
TEST(base::IsEqualFiles(name2, name2), ());
TEST(!base::IsEqualFiles(name1, name2), ());
TEST(base::DeleteFileX(name1), ());
TEST(base::DeleteFileX(name2), ());
}
{
MakeFile(name1, 1024 * 1024, 'a');
MakeFile(name2, 1024 * 1024, 'b');
TEST(base::IsEqualFiles(name1, name1), ());
TEST(base::IsEqualFiles(name2, name2), ());
TEST(!base::IsEqualFiles(name1, name2), ());
TEST(base::DeleteFileX(name1), ());
TEST(base::DeleteFileX(name2), ());
}
{
MakeFile(name1, 1024 * 1024, 'a');
MakeFile(name2, 1024 * 1024 + 1, 'b');
TEST(!base::IsEqualFiles(name1, name2), ());
TEST(base::DeleteFileX(name1), ());
TEST(base::DeleteFileX(name2), ());
}
}
UNIT_TEST(EmptyFile)
{
using namespace base;
std::string const name = "test.empty";
std::string const copy = "test.empty.copy";
// Check that both files are not exist.
uint64_t sz;
TEST(!GetFileSize(name, sz), ());
TEST(!GetFileSize(copy, sz), ());
// Try to copy non existing file - failed.
TEST(!CopyFileX(name, copy), ());
// Again, both files are not exist.
TEST(!GetFileSize(name, sz), ());
TEST(!GetFileSize(copy, sz), ());
{
// Create empty file with zero size.
FileData f(name, base::FileData::Op::WRITE_TRUNCATE);
}
// Check that empty file is on disk.
TEST(GetFileSize(name, sz), ());
TEST_EQUAL(sz, 0, ());
// Do copy.
TEST(CopyFileX(name, copy), ());
// TEST(!RenameFileX(name, copy), ());
// Delete copy file and rename name -> copy.
TEST(DeleteFileX(copy), ());
TEST(RenameFileX(name, copy), ());
// Now we don't have an initial file but have a copy.
TEST(!GetFileSize(name, sz), ());
TEST(GetFileSize(copy, sz), ());
TEST_EQUAL(sz, 0, ());
// Delete copy file.
TEST(DeleteFileX(copy), ());
}
UNIT_TEST(RenameOnExistingFile)
{
using namespace base;
std::string const name = "test.empty";
std::string const copy = "test.empty.copy";
{
FileData f(name, FileData::Op::WRITE_TRUNCATE);
uint8_t const x = 1;
f.Write(&x, 1);
}
{
FileData f(copy, FileData::Op::WRITE_TRUNCATE);
uint8_t const x = 2;
f.Write(&x, 1);
}
TEST(RenameFileX(name, copy), ());
{
FileData f(copy, FileData::Op::READ);
uint8_t x;
f.Read(0, &x, 1);
TEST_EQUAL(x, 1, ());
}
TEST(DeleteFileX(copy), ());
}
// Made this 'obvious' test for getline. I had (or not?) behaviour when 'while (getline)' loop
// didn't get last string in file without trailing '\n'.
UNIT_TEST(File_StdGetLine)
{
std::string const fName = "test.txt";
for (char const * buffer : {"x\nxy\nxyz\nxyzk", "x\nxy\nxyz\nxyzk\n"})
{
{
base::FileData f(fName, base::FileData::Op::WRITE_TRUNCATE);
f.Write(buffer, std::strlen(buffer));
}
{
std::ifstream ifs(fName);
std::string line;
size_t count = 0;
while (std::getline(ifs, line))
{
++count;
TEST_EQUAL(line.size(), count, ());
}
TEST_EQUAL(count, 4, ());
}
TEST(base::DeleteFileX(fName), ());
}
}
} // namespace file_data_test

View file

@ -0,0 +1,57 @@
#include "testing/testing.hpp"
#include "coding/file_sort.hpp"
#include "coding/reader.hpp"
#include "coding/write_to_sink.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <random>
#include <vector>
using namespace std;
namespace
{
void TestFileSorter(vector<uint32_t> & data, char const * tmpFileName, size_t bufferSize)
{
vector<char> serial;
typedef MemWriter<vector<char>> MemWriterType;
MemWriterType writer(serial);
typedef WriterFunctor<MemWriterType> OutT;
OutT out(writer);
FileSorter<uint32_t, OutT> sorter(bufferSize, tmpFileName, out);
for (size_t i = 0; i < data.size(); ++i)
sorter.Add(data[i]);
sorter.SortAndFinish();
TEST_EQUAL(serial.size(), data.size() * sizeof(data[0]), ());
sort(data.begin(), data.end());
MemReader reader(&serial[0], serial.size());
TEST_EQUAL(reader.Size(), data.size() * sizeof(data[0]), ());
vector<uint32_t> result(data.size());
reader.Read(0, &result[0], reader.Size());
TEST_EQUAL(result, data, ());
}
} // namespace
UNIT_TEST(FileSorter_Smoke)
{
vector<uint32_t> data;
data.push_back(2);
data.push_back(3);
data.push_back(1);
TestFileSorter(data, "file_sorter_test_smoke.tmp", 10);
}
UNIT_TEST(FileSorter_Random)
{
mt19937 rng(0);
vector<uint32_t> data(1000);
for (size_t i = 0; i < data.size(); ++i)
data[i] = ((i + 1 % 100) ? rng() : data[i - 20]);
TestFileSorter(data, "file_sorter_test_random.tmp", data.size() / 10);
}

View file

@ -0,0 +1,421 @@
#include "testing/testing.hpp"
#include "coding/files_container.hpp"
#include "coding/varint.hpp"
#include "base/logging.hpp"
#include "base/scope_guard.hpp"
#include "base/string_utils.hpp"
#include "std/target_os.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#ifndef OMIM_OS_WINDOWS
#include <unistd.h> // _SC_PAGESIZE
#endif
using namespace std;
UNIT_TEST(FilesContainer_Smoke)
{
string const fName = "files_container.tmp";
FileWriter::DeleteFileX(fName);
size_t const count = 10;
// fill container one by one
{
FilesContainerW writer(fName);
for (size_t i = 0; i < count; ++i)
{
auto w = writer.GetWriter(strings::to_string(i));
for (uint32_t j = 0; j < i; ++j)
WriteVarUint(w, j);
}
}
// read container one by one
{
FilesContainerR reader(fName);
for (size_t i = 0; i < count; ++i)
{
FilesContainerR::TReader r = reader.GetReader(strings::to_string(i));
ReaderSource<FilesContainerR::TReader> src(r);
for (uint32_t j = 0; j < i; ++j)
{
uint32_t const test = ReadVarUint<uint32_t>(src);
TEST_EQUAL(j, test, ());
}
}
}
// append to container
uint32_t const arrAppend[] = {888, 777, 666};
for (size_t i = 0; i < ARRAY_SIZE(arrAppend); ++i)
{
{
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
auto w = writer.GetWriter(strings::to_string(arrAppend[i]));
WriteVarUint(w, arrAppend[i]);
}
// read appended
{
FilesContainerR reader(fName);
FilesContainerR::TReader r = reader.GetReader(strings::to_string(arrAppend[i]));
ReaderSource<FilesContainerR::TReader> src(r);
uint32_t const test = ReadVarUint<uint32_t>(src);
TEST_EQUAL(arrAppend[i], test, ());
}
}
FileWriter::DeleteFileX(fName);
}
namespace
{
void CheckInvariant(FilesContainerR & reader, string const & tag, int64_t test)
{
FilesContainerR::TReader r = reader.GetReader(tag);
TEST_EQUAL(test, ReadPrimitiveFromPos<int64_t>(r, 0), ());
}
} // namespace
UNIT_TEST(FilesContainer_Shared)
{
string const fName = "files_container.tmp";
FileWriter::DeleteFileX(fName);
uint32_t const count = 10;
int64_t const test64 = 908175281437210836LL;
{
// shared container fill
FilesContainerW writer(fName);
auto w1 = writer.GetWriter("5");
WriteToSink(w1, uint32_t(0));
for (uint32_t i = 0; i < count; ++i)
WriteVarUint(w1, i);
w1->Flush();
auto w2 = writer.GetWriter("2");
WriteToSink(w2, test64);
w2->Flush();
}
{
// shared container read and fill
FilesContainerR reader(fName);
FilesContainerR::TReader r1 = reader.GetReader("5");
uint64_t const offset = sizeof(uint32_t);
r1 = r1.SubReader(offset, r1.Size() - offset);
CheckInvariant(reader, "2", test64);
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
auto w = writer.GetWriter("3");
ReaderSource<FilesContainerR::TReader> src(r1);
for (uint32_t i = 0; i < count; ++i)
{
uint32_t test = ReadVarUint<uint32_t>(src);
TEST_EQUAL(test, i, ());
WriteVarUint(w, i);
}
}
{
// check invariant
FilesContainerR reader(fName);
CheckInvariant(reader, "2", test64);
}
FileWriter::DeleteFileX(fName);
}
namespace
{
void ReplaceInContainer(string const & fName, char const * key, char const * value)
{
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
auto w = writer.GetWriter(key);
w->Write(value, strlen(value));
}
void CheckContainer(string const & fName, char const * key[], char const * value[], size_t count)
{
FilesContainerR reader(fName);
LOG(LINFO, ("Size=", reader.GetFileSize()));
for (size_t i = 0; i < count; ++i)
{
FilesContainerR::TReader r = reader.GetReader(key[i]);
size_t const szBuffer = 100;
size_t const szS = strlen(value[i]);
char s[szBuffer] = {0};
ASSERT_LESS(szS, szBuffer, ());
r.Read(0, s, szS);
TEST(strcmp(value[i], s) == 0, (s));
}
}
} // namespace
UNIT_TEST(FilesContainer_RewriteExisting)
{
string const fName = "files_container.tmp";
FileWriter::DeleteFileX(fName);
char const * key[] = {"3", "2", "1"};
char const * value[] = {"prolog", "data", "epilog"};
// fill container
{
FilesContainerW writer(fName);
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
{
auto w = writer.GetWriter(key[i]);
w->Write(value[i], strlen(value[i]));
}
}
// re-write middle file in container
char const * buffer1 = "xxxxxxx";
ReplaceInContainer(fName, key[1], buffer1);
char const * value1[] = {value[0], buffer1, value[2]};
CheckContainer(fName, key, value1, 3);
// re-write end file in container
char const * buffer2 = "yyyyyyyyyyyyyy";
ReplaceInContainer(fName, key[2], buffer2);
char const * value2[] = {value[0], buffer1, buffer2};
CheckContainer(fName, key, value2, 3);
// re-write end file in container once again
char const * buffer3 = "zzz";
ReplaceInContainer(fName, key[2], buffer3);
char const * value3[] = {value[0], buffer1, buffer3};
CheckContainer(fName, key, value3, 3);
FileWriter::DeleteFileX(fName);
}
/// @todo To make this test work, need to review FilesContainerW::GetWriter logic.
/*
UNIT_TEST(FilesContainer_ConsecutiveRewriteExisting)
{
string const fName = "files_container.tmp";
FileWriter::DeleteFileX(fName);
char const * key[] = { "3", "2", "1" };
char const * value[] = { "prolog", "data", "epilog" };
// fill container
{
FilesContainerW writer(fName);
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
{
auto w = writer.GetWriter(key[i]);
w->Write(value[i], strlen(value[i]));
}
}
char const * buf0 = "xxx";
char const * buf1 = "yyy";
{
FilesContainerW writer(fName, FileWriter::OP_WRITE_EXISTING);
{
auto w = writer.GetWriter(key[0]);
w->Write(buf0, strlen(buf0));
}
{
auto w = writer.GetWriter(key[1]);
w->Write(buf1, strlen(buf1));
}
}
char const * values[] = { buf0, buf1, value[2] };
CheckContainer(fName, key, values, 3);
}
*/
UNIT_TEST(FilesMappingContainer_Handle)
{
string const fName = "files_container.tmp";
string const tag = "dummy";
{
FilesContainerW writer(fName);
auto w = writer.GetWriter(tag);
w->Write(tag.c_str(), tag.size());
}
{
FilesMappingContainer cont(fName);
FilesMappingContainer::Handle h1 = cont.Map(tag);
TEST(h1.IsValid(), ());
FilesMappingContainer::Handle h2;
TEST(!h2.IsValid(), ());
h2.Assign(std::move(h1));
TEST(!h1.IsValid(), ());
TEST(h2.IsValid(), ());
}
FileWriter::DeleteFileX(fName);
}
UNIT_TEST(FilesMappingContainer_MoveHandle)
{
static uint8_t const kNumMapTests = 200;
class HandleWrapper
{
public:
explicit HandleWrapper(FilesMappingContainer::Handle && handle) : m_handle(std::move(handle))
{
TEST(m_handle.IsValid(), ());
}
private:
FilesMappingContainer::Handle m_handle;
};
string const containerPath = "files_container.tmp";
string const tagName = "dummy";
SCOPE_GUARD(deleteContainerFileGuard, bind(&FileWriter::DeleteFileX, cref(containerPath)));
{
FilesContainerW writer(containerPath);
auto w = writer.GetWriter(tagName);
w->Write(tagName.c_str(), tagName.size());
}
{
FilesMappingContainer cont(containerPath);
FilesMappingContainer::Handle h1 = cont.Map(tagName);
TEST(h1.IsValid(), ());
FilesMappingContainer::Handle h2(std::move(h1));
TEST(h2.IsValid(), ());
TEST(!h1.IsValid(), ());
for (int i = 0; i < kNumMapTests; ++i)
{
FilesMappingContainer::Handle parent_handle = cont.Map(tagName);
HandleWrapper tmp(std::move(parent_handle));
}
}
}
UNIT_TEST(FilesMappingContainer_Smoke)
{
string const fName = "files_container.tmp";
char const * key[] = {"3", "2", "1"};
uint32_t const count = 1000000;
// fill container
{
FilesContainerW writer(fName);
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
{
auto w = writer.GetWriter(key[i]);
for (uint32_t j = 0; j < count; ++j)
{
uint32_t v = j + static_cast<uint32_t>(i);
w->Write(&v, sizeof(v));
}
}
}
{
FilesMappingContainer reader(fName);
for (size_t i = 0; i < ARRAY_SIZE(key); ++i)
{
FilesMappingContainer::Handle h = reader.Map(key[i]);
uint32_t const * data = h.GetData<uint32_t>();
for (uint32_t j = 0; j < count; ++j)
{
TEST_EQUAL(j + i, *data, ());
++data;
}
h.Unmap();
}
}
FileWriter::DeleteFileX(fName);
}
UNIT_TEST(FilesMappingContainer_PageSize)
{
string const fName = "files_container.tmp";
size_t const pageSize =
#ifndef OMIM_OS_WINDOWS
sysconf(_SC_PAGESIZE);
#else
4096;
#endif
LOG(LINFO, ("Page size:", pageSize));
char const * key[] = {"3", "2", "1"};
char const byte[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
size_t count[] = {pageSize - 1, pageSize, pageSize + 1};
size_t const sz = ARRAY_SIZE(key);
{
FilesContainerW writer(fName);
for (size_t i = 0; i < sz; ++i)
{
auto w = writer.GetWriter(key[i]);
for (size_t j = 0; j < count[i]; ++j)
w->Write(&byte[j % ARRAY_SIZE(byte)], 1);
}
}
{
FilesMappingContainer reader(fName);
FilesMappingContainer::Handle handle[sz];
for (size_t i = 0; i < sz; ++i)
{
handle[i].Assign(reader.Map(key[i]));
TEST_EQUAL(handle[i].GetSize(), count[i], ());
}
for (size_t i = 0; i < sz; ++i)
{
char const * data = handle[i].GetData<char>();
for (size_t j = 0; j < count[i]; ++j)
TEST_EQUAL(*data++, byte[j % ARRAY_SIZE(byte)], ());
}
}
FileWriter::DeleteFileX(fName);
}

View file

@ -0,0 +1,81 @@
#include "testing/testing.hpp"
#include "coding/fixed_bits_ddvector.hpp"
#include "coding/writer.hpp"
#include <cstdint>
#include <initializer_list>
#include <random>
#include <utility>
using namespace std;
namespace
{
template <size_t Bits>
void TestWithData(vector<uint32_t> const & lst)
{
using TVector = FixedBitsDDVector<Bits, MemReader>;
using TBuffer = vector<uint8_t>;
using TWriter = MemWriter<TBuffer>;
TBuffer buf;
{
TWriter writer(buf);
typename TVector::template Builder<TWriter> builder(writer);
uint32_t optCount = 0;
uint32_t const optBound = (1 << Bits) - 2;
for (uint32_t v : lst)
{
if (v < optBound)
++optCount;
builder.PushBack(v);
}
pair<uint32_t, uint32_t> expected(optCount, lst.size());
TEST_EQUAL(builder.GetCount(), expected, ());
}
MemReader reader(buf.data(), buf.size());
auto const vec = TVector::Create(reader);
uint32_t i = 0;
for (uint32_t actual : lst)
{
uint32_t expected;
TEST(vec->Get(i, expected), ());
TEST_EQUAL(expected, actual, ());
++i;
}
}
} // namespace
UNIT_TEST(FixedBitsDDVector_Smoke)
{
TestWithData<3>({0, 3, 6});
TestWithData<3>({7, 20, 50});
TestWithData<3>({1, 0, 4, 30, 5, 3, 6, 7, 2, 8, 0});
}
UNIT_TEST(FixedBitsDDVector_Rand)
{
vector<uint32_t> v;
default_random_engine gen;
uniform_int_distribution<uint32_t> distribution(0, 1000);
size_t constexpr kMaxCount = 1000;
for (size_t i = 0; i < kMaxCount; ++i)
v.push_back(distribution(gen));
TestWithData<3>(v);
TestWithData<4>(v);
TestWithData<5>(v);
TestWithData<6>(v);
TestWithData<7>(v);
TestWithData<8>(v);
TestWithData<9>(v);
}

View file

@ -0,0 +1,196 @@
#include "testing/testing.hpp"
#include "coding/byte_stream.hpp"
#include "coding/coding_tests/test_polylines.hpp"
#include "coding/geometry_coding.hpp"
#include "coding/point_coding.hpp"
#include "coding/varint.hpp"
#include "coding/writer.hpp"
#include "geometry/geometry_tests/large_polygon.hpp"
#include "geometry/mercator.hpp"
#include "geometry/parametrized_segment.hpp"
#include "geometry/simplification.hpp"
#include "base/logging.hpp"
#include <cstddef>
#include <cstdint>
#include <vector>
using namespace coding;
using namespace std;
using PU = m2::PointU;
using PD = m2::PointD;
namespace
{
m2::PointU D2U(m2::PointD const & p)
{
return PointDToPointU(p, kPointCoordBits);
}
m2::PointU GetMaxPoint()
{
return D2U(m2::PointD(mercator::Bounds::kMaxX, mercator::Bounds::kMaxY));
}
void TestPolylineEncode(string testName, vector<m2::PointU> const & points, m2::PointU const & maxPoint,
void (*fnEncode)(InPointsT const & points, m2::PointU const & basePoint,
m2::PointU const & maxPoint, OutDeltasT & deltas),
void (*fnDecode)(InDeltasT const & deltas, m2::PointU const & basePoint,
m2::PointU const & maxPoint, OutPointsT & points))
{
size_t const count = points.size();
if (count == 0)
return;
m2::PointU const basePoint = m2::PointU::Zero();
vector<uint64_t> deltas;
deltas.resize(count);
OutDeltasT deltasA(deltas);
fnEncode(make_read_adapter(points), basePoint, maxPoint, deltasA);
vector<m2::PointU> decodedPoints;
decodedPoints.resize(count);
OutPointsT decodedPointsA(decodedPoints);
fnDecode(make_read_adapter(deltas), basePoint, maxPoint, decodedPointsA);
TEST_EQUAL(points, decodedPoints, ());
if (points.size() > 10)
{
vector<char> data;
MemWriter<vector<char>> writer(data);
for (size_t i = 0; i != deltas.size(); ++i)
WriteVarUint(writer, deltas[i]);
LOG(LINFO, (testName, points.size(), data.size()));
}
}
vector<m2::PointU> SimplifyPoints(vector<m2::PointU> const & points, double eps)
{
vector<m2::PointU> simpPoints;
SimplifyDefault(points.begin(), points.end(), eps, simpPoints);
return simpPoints;
}
void TestEncodePolyline(string name, m2::PointU maxPoint, vector<m2::PointU> const & points)
{
TestPolylineEncode(name + "1", points, maxPoint, &EncodePolylinePrev1, &DecodePolylinePrev1);
TestPolylineEncode(name + "2", points, maxPoint, &EncodePolylinePrev2, &DecodePolylinePrev2);
TestPolylineEncode(name + "3", points, maxPoint, &EncodePolylinePrev3, &DecodePolylinePrev3);
}
} // namespace
UNIT_TEST(EncodePointDeltaAsUint)
{
for (int x = -100; x <= 100; ++x)
{
for (int y = -100; y <= 100; ++y)
{
PU orig = PU(100 + x, 100 + y);
PU pred = PU(100, 100);
TEST_EQUAL(orig, DecodePointDeltaFromUint(EncodePointDeltaAsUint(orig, pred), pred), ());
vector<char> data;
PushBackByteSink<vector<char>> sink(data);
WriteVarUint(sink, EncodePointDeltaAsUint(orig, pred));
size_t expectedSize = 1;
if (x >= 8 || x < -8 || y >= 4 || y < -4)
expectedSize = 2;
if (x >= 64 || x < -64 || y >= 64 || y < -64)
expectedSize = 3;
TEST_EQUAL(data.size(), expectedSize, (x, y));
}
}
}
UNIT_TEST(PredictPointsInPolyline2)
{
// Ci = Ci-1 + (Ci-1 + Ci-2) / 2
TEST_EQUAL(PU(5, 5), PredictPointInPolyline(PD(8, 7), PU(4, 4), PU(1, 2)), ());
// Clamp max
TEST_EQUAL(PU(4, 4), PredictPointInPolyline(PD(4, 4), PU(4, 4), PU(1, 2)), ());
TEST_EQUAL(PU(5, 5), PredictPointInPolyline(PD(8, 7), PU(4, 4), PU(1, 2)), ());
TEST_EQUAL(PU(5, 5), PredictPointInPolyline(PD(5, 5), PU(4, 4), PU(1, 2)), ());
// Clamp 0
TEST_EQUAL(PU(4, 0), PredictPointInPolyline(PD(5, 5), PU(4, 1), PU(4, 4)), ());
}
UNIT_TEST(PredictPointsInTriangle)
{
// Ci = Ci-1 + Ci-2 - Ci-3
TEST_EQUAL(PU(1, 1), PredictPointInTriangle(PD(100, 100), PU(1, 0), PU(0, 1), PU(0, 0)), ());
// Clamp 0
TEST_EQUAL(PU(0, 0), PredictPointInTriangle(PD(100, 100), PU(1, 0), PU(0, 1), PU(5, 5)), ());
// Clamp max
TEST_EQUAL(PU(10, 10), PredictPointInTriangle(PD(10, 10), PU(8, 7), PU(6, 5), PU(1, 1)), ());
}
/*
UNIT_TEST(PredictPointsInPolyline3_Square)
{
TEST_EQUAL(PU(5, 1), PredictPointInPolyline(PU(6, 6), PU(5, 4), PU(2, 4), PU(2, 1)), ());
TEST_EQUAL(PU(5, 3), PredictPointInPolyline(PU(6, 6), PU(4, 1), PU(2, 2), PU(3, 4)), ());
}
UNIT_TEST(PredictPointsInPolyline3_SquareClamp0)
{
TEST_EQUAL(PU(5, 1), PredictPointInPolyline(PU(6, 6), PU(5, 4), PU(2, 4), PU(2, 1)), ());
TEST_EQUAL(PU(4, 0), PredictPointInPolyline(PU(6, 6), PU(2, 0), PU(3, 2), PU(5, 1)), ());
}
UNIT_TEST(PredictPointsInPolyline3_90deg)
{
TEST_EQUAL(PU(3, 2), PredictPointInPolyline(PU(8, 8), PU(3, 6), PU(1, 6), PU(1, 5)), ());
}
*/
UNIT_TEST(EncodePolyline)
{
size_t const kSizes[] = {0, 1, 2, 3, 4, ARRAY_SIZE(LargePolygon::kLargePolygon)};
m2::PointU const maxPoint(1000000000, 1000000000);
for (size_t iSize = 0; iSize < ARRAY_SIZE(kSizes); ++iSize)
{
size_t const polygonSize = kSizes[iSize];
vector<m2::PointU> points;
points.reserve(polygonSize);
for (size_t i = 0; i < polygonSize; ++i)
points.push_back(m2::PointU(static_cast<uint32_t>(LargePolygon::kLargePolygon[i].x * 10000),
static_cast<uint32_t>((LargePolygon::kLargePolygon[i].y + 200) * 10000)));
TestEncodePolyline("Unsimp", maxPoint, points);
TestEncodePolyline("1simp", maxPoint, SimplifyPoints(points, 1));
TestEncodePolyline("2simp", maxPoint, SimplifyPoints(points, 2));
TestEncodePolyline("4simp", maxPoint, SimplifyPoints(points, 4));
TestEncodePolyline("10simp", maxPoint, SimplifyPoints(points, 10));
TestEncodePolyline("100simp", maxPoint, SimplifyPoints(points, 100));
TestEncodePolyline("500simp", maxPoint, SimplifyPoints(points, 500));
TestEncodePolyline("1000simp", maxPoint, SimplifyPoints(points, 1000));
TestEncodePolyline("2000simp", maxPoint, SimplifyPoints(points, 2000));
TestEncodePolyline("4000simp", maxPoint, SimplifyPoints(points, 4000));
}
}
// see 476c1d1d125f0c2deb8c commit for special decode test
UNIT_TEST(DecodeEncodePolyline_DataSet1)
{
size_t const count = ARRAY_SIZE(geometry_coding_tests::arr1);
vector<m2::PointU> points;
points.reserve(count);
for (size_t i = 0; i < count; ++i)
points.push_back(D2U(geometry_coding_tests::arr1[i]));
TestPolylineEncode("DataSet1", points, GetMaxPoint(), &EncodePolyline, &DecodePolyline);
}

View file

@ -0,0 +1,65 @@
#include "testing/testing.hpp"
#include "coding/byte_stream.hpp"
#include "coding/coding_tests/test_polylines.hpp"
#include "coding/geometry_coding.hpp"
#include "coding/reader.hpp"
#include "geometry/mercator.hpp"
#include "base/logging.hpp"
#include "base/math.hpp"
#include <vector>
using namespace std;
// Copy-Paste from generator/feature_builder.cpp
namespace
{
bool IsEqual(double d1, double d2)
{
return AlmostEqualAbs(d1, d2, kMwmPointAccuracy);
}
bool IsEqual(m2::PointD const & p1, m2::PointD const & p2)
{
return p1.EqualDxDy(p2, kMwmPointAccuracy);
}
bool IsEqual(m2::RectD const & r1, m2::RectD const & r2)
{
return (IsEqual(r1.minX(), r2.minX()) && IsEqual(r1.minY(), r2.minY()) && IsEqual(r1.maxX(), r2.maxX()) &&
IsEqual(r1.maxY(), r2.maxY()));
}
} // namespace
UNIT_TEST(SaveLoadPolyline_DataSet1)
{
using namespace geometry_coding_tests;
vector<m2::PointD> data1(arr1, arr1 + ARRAY_SIZE(arr1));
vector<char> buffer;
PushBackByteSink<vector<char>> w(buffer);
serial::GeometryCodingParams cp;
serial::SaveOuterPath(data1, cp, w);
vector<m2::PointD> data2;
ArrayByteSource r(&buffer[0]);
serial::LoadOuterPath(r, cp, data2);
TEST_EQUAL(data1.size(), data2.size(), ());
m2::RectD r1, r2;
for (size_t i = 0; i < data1.size(); ++i)
{
r1.Add(data1[i]);
r2.Add(data2[i]);
TEST(IsEqual(data1[i], data2[i]), (data1[i], data2[i]));
}
TEST(IsEqual(r1, r2), (r1, r2));
}

View file

@ -0,0 +1,51 @@
#include "testing/testing.hpp"
#include "coding/hex.hpp"
#include <cstddef>
#include <cstdint>
#include <random>
#include <string>
using namespace std;
UNIT_TEST(GoldenRecode)
{
string data("\x01\x23\x45\x67\x89\xAB\xCD\xEF");
string hexData("0123456789ABCDEF");
TEST_EQUAL(ToHex(data), hexData, ());
TEST_EQUAL(data, FromHex(hexData), ());
}
UNIT_TEST(RandomRecode)
{
mt19937 rng(0);
for (size_t i = 0; i < 256; ++i)
{
string data(1 + (rng() % 20), 0);
for (size_t j = 0; j < data.size(); ++j)
data[j] = static_cast<char>(rng() % 26) + 'A';
TEST_EQUAL(data, FromHex(ToHex(data)), ());
}
}
UNIT_TEST(EncodeNumber)
{
TEST_EQUAL(NumToHex(uint64_t(0x0123456789ABCDEFULL)), "0123456789ABCDEF", ());
}
UNIT_TEST(DecodeLowerCaseHex)
{
TEST_EQUAL(FromHex("fe"), "\xfe", ());
}
UNIT_TEST(EncodeEmptyString)
{
TEST_EQUAL(ToHex(string()), "", ());
}
UNIT_TEST(DecodeEmptyString)
{
TEST_EQUAL(FromHex(""), "", ());
}

View file

@ -0,0 +1,133 @@
#include "testing/testing.hpp"
#include "coding/huffman.hpp"
#include "coding/reader.hpp"
#include "coding/writer.hpp"
#include "base/string_utils.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
using namespace std;
namespace
{
vector<strings::UniString> MakeUniStringVector(vector<string> const & v)
{
vector<strings::UniString> result(v.size());
for (size_t i = 0; i < v.size(); ++i)
result[i] = strings::MakeUniString(v[i]);
return result;
}
void TestDecode(coding::HuffmanCoder const & h, uint32_t bits, uint32_t len, uint32_t expected)
{
coding::HuffmanCoder::Code code(bits, len);
uint32_t received;
TEST(h.Decode(code, received), ("Could not decode", code.bits, "( length", code.len, ")"));
TEST_EQUAL(expected, received, ());
}
} // namespace
namespace coding
{
UNIT_TEST(Huffman_Smoke)
{
HuffmanCoder h;
h.Init(MakeUniStringVector(vector<string>{"ab", "ac"}));
TestDecode(h, 0, 1, static_cast<uint32_t>('a')); // 0
TestDecode(h, 1, 2, static_cast<uint32_t>('b')); // 10
TestDecode(h, 3, 2, static_cast<uint32_t>('c')); // 11
}
UNIT_TEST(Huffman_OneSymbol)
{
HuffmanCoder h;
h.Init(MakeUniStringVector(vector<string>{string(5, 0)}));
TestDecode(h, 0, 0, 0);
}
UNIT_TEST(Huffman_NonAscii)
{
HuffmanCoder h;
string const data = "2πΩ";
strings::UniString const uniData = strings::MakeUniString(data);
h.Init(vector<strings::UniString>{uniData});
TestDecode(h, 0, 2, static_cast<uint32_t>(uniData[0])); // 00
TestDecode(h, 1, 1, static_cast<uint32_t>(uniData[1])); // 1
TestDecode(h, 2, 2, static_cast<uint32_t>(uniData[2])); // 01
}
UNIT_TEST(Huffman_Init)
{
HuffmanCoder h;
h.Init(MakeUniStringVector(vector<string>{"ab"}));
vector<uint8_t> buf;
buf.push_back(16); // size
buf.push_back(105); // 01101001
buf.push_back(150); // 10010110
MemReader memReader(&buf[0], buf.size());
ReaderSource<MemReader> reader(memReader);
strings::UniString received = h.ReadAndDecode(reader);
strings::UniString expected = strings::MakeUniString("baababbaabbabaab");
TEST_EQUAL(expected, received, ());
}
UNIT_TEST(Huffman_Serialization_Encoding)
{
HuffmanCoder hW;
hW.Init(MakeUniStringVector(vector<string>{"aaaaaaaaaa", "bbbbbbbbbb", "ccccc", "ddddd"})); // 10, 10, 5, 5
vector<uint8_t> buf;
MemWriter<vector<uint8_t>> writer(buf);
hW.WriteEncoding(writer);
HuffmanCoder hR;
MemReader memReader(&buf[0], buf.size());
ReaderSource<MemReader> reader(memReader);
hR.ReadEncoding(reader);
TEST_EQUAL(reader.Pos(), writer.Pos(), ());
TestDecode(hW, 0, 2, static_cast<uint32_t>('a')); // 00
TestDecode(hW, 2, 2, static_cast<uint32_t>('b')); // 01
TestDecode(hW, 1, 2, static_cast<uint32_t>('c')); // 10
TestDecode(hW, 3, 2, static_cast<uint32_t>('d')); // 11
TestDecode(hR, 0, 2, static_cast<uint32_t>('a'));
TestDecode(hR, 2, 2, static_cast<uint32_t>('b'));
TestDecode(hR, 1, 2, static_cast<uint32_t>('c'));
TestDecode(hR, 3, 2, static_cast<uint32_t>('d'));
}
UNIT_TEST(Huffman_Serialization_Data)
{
HuffmanCoder hW;
hW.Init(MakeUniStringVector(vector<string>{"aaaaaaaaaa", "bbbbbbbbbb", "ccccc", "ddddd"})); // 10, 10, 5, 5
vector<uint8_t> buf;
string const data = "abacabaddddaaabbcabacabadbabd";
strings::UniString expected = strings::UniString(data.begin(), data.end());
MemWriter<vector<uint8_t>> writer(buf);
hW.WriteEncoding(writer);
hW.EncodeAndWrite(writer, expected);
HuffmanCoder hR;
MemReader memReader(&buf[0], buf.size());
ReaderSource<MemReader> reader(memReader);
hR.ReadEncoding(reader);
strings::UniString received = hR.ReadAndDecode(reader);
TEST_EQUAL(expected, received, ());
}
} // namespace coding

View file

@ -0,0 +1,108 @@
#include "testing/testing.hpp"
#include "coding/map_uint32_to_val.hpp"
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "coding/writer.hpp"
#include <utility>
#include <vector>
namespace map_uint32_tests
{
using namespace std;
using BufferT = vector<uint8_t>;
using ValuesT = vector<uint32_t>;
using BuilderT = MapUint32ToValueBuilder<uint32_t>;
using MapT = MapUint32ToValue<uint32_t>;
UNIT_TEST(MapUint32Val_Small)
{
{
BuilderT builder;
BufferT buffer;
MemWriter writer(buffer);
builder.Freeze(writer, [](Writer &, auto, auto) {});
LOG(LINFO, ("Empty map size =", buffer.size()));
MemReader reader(buffer.data(), buffer.size());
auto map = MapT::Load(reader, [](NonOwningReaderSource &, uint32_t, ValuesT &) {});
TEST_EQUAL(map->Count(), 0, ());
uint32_t dummy;
TEST(!map->Get(1, dummy), ());
}
{
BuilderT builder;
builder.Put(1, 777);
BufferT buffer;
MemWriter writer(buffer);
builder.Freeze(writer, [](Writer & writer, auto b, auto e)
{
WriteVarUint(writer, *b++);
TEST(b == e, ());
});
MemReader reader(buffer.data(), buffer.size());
auto map = MapT::Load(reader, [](NonOwningReaderSource & source, uint32_t blockSize, ValuesT & values)
{
TEST_EQUAL(blockSize, 1, ("GetThreadsafe should pass optimal blockSize"));
while (source.Size() > 0)
values.push_back(ReadVarUint<uint32_t>(source));
TEST_EQUAL(values.size(), 1, ());
});
TEST_EQUAL(map->Count(), 1, ());
uint32_t val;
TEST(map->GetThreadsafe(1, val), ());
TEST_EQUAL(val, 777, ());
}
}
UNIT_TEST(MapUint32Val_Smoke)
{
vector<pair<uint32_t, uint32_t>> data;
size_t const dataSize = 227;
data.resize(dataSize);
for (size_t i = 0; i < data.size(); ++i)
data[i] = make_pair(static_cast<uint32_t>(i), static_cast<uint32_t>(i));
BufferT buffer;
{
BuilderT builder;
for (auto const & d : data)
builder.Put(d.first, d.second);
MemWriter writer(buffer);
builder.Freeze(writer, [](Writer & w, BuilderT::Iter begin, BuilderT::Iter end)
{
for (auto it = begin; it != end; ++it)
WriteToSink(w, *it);
});
}
{
MemReader reader(buffer.data(), buffer.size());
auto table = MapUint32ToValue<uint32_t>::Load(
reader, [](NonOwningReaderSource & source, uint32_t blockSize, ValuesT & values)
{
values.reserve(blockSize);
while (source.Size() > 0)
values.push_back(ReadPrimitiveFromSource<uint32_t>(source));
});
TEST(table.get(), ());
for (auto const & d : data)
{
uint32_t res;
TEST(table->Get(d.first, res), ());
TEST_EQUAL(res, d.second, ());
TEST(table->GetThreadsafe(d.first, res), ());
TEST_EQUAL(res, d.second, ());
}
}
}
} // namespace map_uint32_tests

View file

@ -0,0 +1,48 @@
#include "testing/testing.hpp"
#include "base/macros.hpp"
#include "coding/reader.hpp"
UNIT_TEST(MemReaderSimple)
{
char constexpr data[] = "123";
size_t constexpr n = ARRAY_SIZE(data);
MemReader const memReader(data, n);
for (size_t i = 0; i < n; ++i)
{
uint8_t c[4] = {0xff, 0xff, 0xff, 0xff};
ReadFromPos(memReader, i, c, n - i);
for (size_t j = 0; j < n; ++j)
TEST_EQUAL(c[j], i + j < n ? data[i + j] : uint8_t(0xff), (i, j, n));
}
MemReader const subReader = memReader.SubReader(1, n - 2);
for (size_t i = 1; i < n - 1; ++i)
{
uint8_t c[4] = {0xff, 0xff, 0xff, 0xff};
ReadFromPos(subReader, i, c, n - i - 2);
for (size_t j = 0; j < n; ++j)
TEST_EQUAL(c[j], i + j < n - 2 ? data[i + j + 1] : uint8_t(0xff), (i, j, n));
}
}
UNIT_TEST(MemReaderStringView)
{
std::string_view constexpr data = "1234567";
MemReader const memReader(data);
size_t constexpr n = data.size();
for (size_t i = 0; i < n; ++i)
{
uint8_t c[n] = {0};
ReadFromPos(memReader, i, c, n - i);
for (size_t j = 0; j < n; ++j)
TEST_EQUAL(c[j], i + j < n ? data[i + j] : uint8_t{0}, (i, j, n));
}
MemReader const subReader = memReader.SubReader(1, n - 2);
for (size_t i = 1; i < n - 1; ++i)
{
uint8_t c[n] = {0};
ReadFromPos(subReader, i, c, n - i - 2);
for (size_t j = 0; j < n; ++j)
TEST_EQUAL(c[j], i + j < n - 2 ? data[i + j + 1] : uint8_t{0}, (i, j, n));
}
}

View file

@ -0,0 +1,30 @@
#include "testing/testing.hpp"
#include "coding/file_writer.hpp"
#include "coding/writer.hpp"
#include "base/macros.hpp"
#include <vector>
UNIT_TEST(MemWriterEmpty)
{
std::vector<char> data;
{
MemWriter<std::vector<char>> writer(data);
}
TEST(data.empty(), (data));
}
UNIT_TEST(MemWriterSimple)
{
std::vector<char> data;
MemWriter<std::vector<char>> writer(data);
writer.Write("Hello", 5);
writer.Write(",", 1);
writer.Write("world!", 6);
char const expected[] = "Hello,world!";
TEST_EQUAL(data.size(), ARRAY_SIZE(expected) - 1, ());
TEST(equal(data.begin(), data.end(), &expected[0]), (data));
}

View file

@ -0,0 +1,32 @@
#include "testing/testing.hpp"
#include "coding/move_to_front.hpp"
#include <cstdint>
using namespace coding;
namespace
{
UNIT_TEST(MoveToFront_Smoke)
{
MoveToFront mtf;
for (size_t i = 0; i < 256; ++i)
TEST_EQUAL(mtf[i], i, ());
// Initially 3 should be on the 3rd position.
TEST_EQUAL(mtf.Transform(3), 3, ());
// After the first transform, 3 should be moved to the 0th position.
TEST_EQUAL(mtf.Transform(3), 0, ());
TEST_EQUAL(mtf.Transform(3), 0, ());
TEST_EQUAL(mtf.Transform(3), 0, ());
TEST_EQUAL(mtf[0], 3, ());
TEST_EQUAL(mtf[1], 0, ());
TEST_EQUAL(mtf[2], 1, ());
TEST_EQUAL(mtf[3], 2, ());
for (size_t i = 4; i < 256; ++i)
TEST_EQUAL(mtf[i], i, ());
}
} // namespace

View file

@ -0,0 +1,45 @@
#include "testing/testing.hpp"
#include <fstream>
#include <string>
#include <vector>
using namespace std;
void loadFile(vector<unsigned char> & buffer,
string const & filename) // designed for loading files from hard disk in an std::vector
{
ifstream file(filename.c_str(), ios::in | ios::binary | ios::ate);
// get filesize
streamsize size = 0;
if (file.seekg(0, ios::end).good())
size = file.tellg();
if (file.seekg(0, ios::beg).good())
size -= file.tellg();
// read contents of the file into the vector
if (size > 0)
{
buffer.resize((size_t)size);
file.read((char *)(&buffer[0]), size);
}
else
buffer.clear();
}
UNIT_TEST(PngDecode)
{
// // load and decode
// vector<unsigned char> buffer, image;
// loadFile(buffer, "../../data/font_0.png");
// unsigned long w, h;
// int error = DecodePNG(image, w, h, buffer.empty() ? 0 : &buffer[0], (unsigned long)buffer.size());
//
// // if there's an error, display it
// TEST_EQUAL(error, 0, ());
// // the pixels are now in the vector "image", use it as texture, draw it, ...
// TEST_GREATER(image.size(), 4, ("Image is empty???"));
// TEST_EQUAL(w, 1024, ());
// TEST_EQUAL(h, 1024, ());
}

View file

@ -0,0 +1,200 @@
#include "testing/testing.hpp"
#include "coding/coding_tests/test_polylines.hpp"
#include "coding/point_coding.hpp"
#include "geometry/mercator.hpp"
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
#include "base/logging.hpp"
#include "base/math.hpp"
#include <cmath>
#include <random>
using namespace std;
namespace
{
double const kEps = kMwmPointAccuracy;
uint8_t const kCoordBits = kPointCoordBits;
uint32_t const kBig = uint32_t{1} << 30;
void CheckEqualPoints(m2::PointD const & p1, m2::PointD const & p2)
{
TEST(p1.EqualDxDy(p2, kEps), (p1, p2));
TEST_GREATER_OR_EQUAL(p1.x, -180.0, ());
TEST_GREATER_OR_EQUAL(p1.y, -180.0, ());
TEST_LESS_OR_EQUAL(p1.x, 180.0, ());
TEST_LESS_OR_EQUAL(p1.y, 180.0, ());
TEST_GREATER_OR_EQUAL(p2.x, -180.0, ());
TEST_GREATER_OR_EQUAL(p2.y, -180.0, ());
TEST_LESS_OR_EQUAL(p2.x, 180.0, ());
TEST_LESS_OR_EQUAL(p2.y, 180.0, ());
}
} // namespace
UNIT_TEST(PointDToPointU_Epsilons)
{
m2::PointD const arrPt[] = {{-180, -180}, {-180, 180}, {180, 180}, {180, -180}};
m2::PointD const arrD[] = {{1, 1}, {1, -1}, {-1, -1}, {-1, 1}};
size_t const count = ARRAY_SIZE(arrPt);
double eps = 1.0;
while (true)
{
size_t i = 0;
for (; i < count; ++i)
{
m2::PointU p0 = PointDToPointU(arrPt[i].x, arrPt[i].y, kCoordBits);
m2::PointU p1 = PointDToPointU(arrPt[i].x + arrD[i].x * eps, arrPt[i].y + arrD[i].y * eps, kCoordBits);
if (p0 != p1)
break;
}
if (i == count)
break;
eps *= 0.1;
}
LOG(LINFO, ("Epsilon (relative error) =", eps));
for (size_t i = 0; i < count; ++i)
{
m2::PointU const p1 = PointDToPointU(arrPt[i].x, arrPt[i].y, kCoordBits);
m2::PointU const p2(p1.x + arrD[i].x, p1.y + arrD[i].y);
m2::PointD const p3 = PointUToPointD(p2, kCoordBits);
LOG(LINFO, ("Dx =", p3.x - arrPt[i].x, "Dy =", p3.y - arrPt[i].y));
}
}
UNIT_TEST(PointDToPointU_WithLimitRect)
{
mt19937 rng(0);
m2::PointD const limitRectOrigin[] = {{0.0, 0.0}, {10.0, 10.0}, {90.0, 90.0}, {160.0, 160.0}};
double const limitRectSize[] = {0.1, 1.0, 5.0, 10.0, 20.0};
size_t const pointsPerRect = 100;
for (auto const & origin : limitRectOrigin)
{
for (auto const sizeX : limitRectSize)
{
for (auto const sizeY : limitRectSize)
{
m2::RectD const limitRect(origin.x, origin.y, origin.x + sizeX, origin.y + sizeY);
auto distX = uniform_real_distribution<double>(limitRect.minX(), limitRect.maxX());
auto distY = uniform_real_distribution<double>(limitRect.minY(), limitRect.maxY());
auto const coordBits = GetCoordBits(limitRect, kEps);
TEST_NOT_EQUAL(coordBits, 0, ());
// All rects in this test are more than 2 times smaller than mercator range.
TEST_LESS(coordBits, kCoordBits, (limitRect));
for (size_t i = 0; i < pointsPerRect; ++i)
{
auto const pt = m2::PointD(distX(rng), distY(rng));
auto const pointU = PointDToPointU(pt, coordBits, limitRect);
auto const pointD = PointUToPointD(pointU, coordBits, limitRect);
TEST(AlmostEqualAbs(pt, pointD, kEps), (limitRect, pt, pointD, coordBits, kEps));
}
}
}
}
}
UNIT_TEST(PointToInt64Obsolete_Smoke)
{
m2::PointD const arr[] = {{1.25, 1.3}, {180, 90}, {-180, -90}, {0, 0}};
for (size_t i = 0; i < ARRAY_SIZE(arr); ++i)
CheckEqualPoints(arr[i], Int64ToPointObsolete(PointToInt64Obsolete(arr[i], kCoordBits), kCoordBits));
}
UNIT_TEST(PointToInt64Obsolete_Grid)
{
int const delta = 5;
for (int ix = -180; ix <= 180; ix += delta)
{
for (int iy = -180; iy <= 180; iy += delta)
{
m2::PointD const pt(ix, iy);
int64_t const id = PointToInt64Obsolete(pt, kCoordBits);
m2::PointD const pt1 = Int64ToPointObsolete(id, kCoordBits);
CheckEqualPoints(pt, pt1);
int64_t const id1 = PointToInt64Obsolete(pt1, kCoordBits);
TEST_EQUAL(id, id1, (pt, pt1));
}
}
}
UNIT_TEST(PointToInt64Obsolete_Bounds)
{
double const arrEps[] = {-1.0E-2, -1.0E-3, -1.0E-4, 0, 1.0E-4, 1.0E-3, 1.0E-2};
m2::PointD const arrPt[] = {{0, 0}, {-180, -180}, {-180, 180}, {180, 180}, {180, -180},
{-90, -90}, {-90, 90}, {90, 90}, {90, -90}};
for (size_t iP = 0; iP < ARRAY_SIZE(arrPt); ++iP)
{
for (size_t iX = 0; iX < ARRAY_SIZE(arrEps); ++iX)
{
for (size_t iY = 0; iY < ARRAY_SIZE(arrEps); ++iY)
{
m2::PointD const pt(arrPt[iP].x + arrEps[iX], arrPt[iP].y + arrEps[iY]);
m2::PointD const pt1 = Int64ToPointObsolete(PointToInt64Obsolete(pt, kCoordBits), kCoordBits);
TEST(fabs(pt.x - pt1.x) <= (fabs(arrEps[iX]) + kEps) && fabs(pt.y - pt1.y) <= (fabs(arrEps[iY]) + kEps),
(pt, pt1));
}
}
}
}
UNIT_TEST(PointUToUint64Obsolete_0)
{
TEST_EQUAL(0, PointUToUint64Obsolete(m2::PointU(0, 0)), ());
TEST_EQUAL(m2::PointU(0, 0), Uint64ToPointUObsolete(0), ());
}
UNIT_TEST(PointUToUint64Obsolete_Interlaced)
{
TEST_EQUAL(0xAAAAAAAAAAAAAAAAULL, PointUToUint64Obsolete(m2::PointU(0, 0xFFFFFFFF)), ());
TEST_EQUAL(0x5555555555555555ULL, PointUToUint64Obsolete(m2::PointU(0xFFFFFFFF, 0)), ());
TEST_EQUAL(0xAAAAAAAAAAAAAAA8ULL, PointUToUint64Obsolete(m2::PointU(0, 0xFFFFFFFE)), ());
TEST_EQUAL(0x5555555555555554ULL, PointUToUint64Obsolete(m2::PointU(0xFFFFFFFE, 0)), ());
}
UNIT_TEST(PointUToUint64Obsolete_1bit)
{
TEST_EQUAL(2, PointUToUint64Obsolete(m2::PointU(0, 1)), ());
TEST_EQUAL(m2::PointU(0, 1), Uint64ToPointUObsolete(2), ());
TEST_EQUAL(1, PointUToUint64Obsolete(m2::PointU(1, 0)), ());
TEST_EQUAL(m2::PointU(1, 0), Uint64ToPointUObsolete(1), ());
TEST_EQUAL(3ULL << 60, PointUToUint64Obsolete(m2::PointU(kBig, kBig)), ());
TEST_EQUAL((1ULL << 60) - 1, PointUToUint64Obsolete(m2::PointU(kBig - 1, kBig - 1)), ());
}
UNIT_TEST(PointToInt64Obsolete_DataSet1)
{
using namespace geometry_coding_tests;
for (size_t i = 0; i < ARRAY_SIZE(arr1); ++i)
{
m2::PointD const pt(arr1[i].x, arr1[i].y);
int64_t const id = PointToInt64Obsolete(pt, kCoordBits);
m2::PointD const pt1 = Int64ToPointObsolete(id, kCoordBits);
CheckEqualPoints(pt, pt1);
int64_t const id1 = PointToInt64Obsolete(pt1, kCoordBits);
TEST_EQUAL(id, id1, (pt, pt1));
}
}

View file

@ -0,0 +1,49 @@
#include "testing/testing.hpp"
#include "coding/reader.hpp"
#include "coding/reader_cache.hpp"
#include <algorithm>
#include <random>
#include <string>
#include <vector>
using namespace std;
namespace
{
template <class ReaderT>
class CacheReader
{
public:
CacheReader(ReaderT const & reader, uint32_t logPageSize, uint32_t logPageCount)
: m_Reader(reader)
, m_Cache(logPageSize, logPageCount)
{}
void Read(uint64_t pos, void * p, size_t size) const { m_Cache.Read(m_Reader, pos, p, size); }
private:
ReaderT m_Reader;
ReaderCache<ReaderT const> mutable m_Cache;
};
} // namespace
UNIT_TEST(CacheReaderRandomTest)
{
vector<char> data(100000);
for (size_t i = 0; i < data.size(); ++i)
data[i] = static_cast<char>(i % 253);
MemReader memReader(&data[0], data.size());
CacheReader<MemReader> cacheReader(MemReader(&data[0], data.size()), 10, 5);
mt19937 rng(0);
for (size_t i = 0; i < 100000; ++i)
{
size_t pos = rng() % data.size();
size_t len = min(static_cast<size_t>(1 + (rng() % 127)), data.size() - pos);
string readMem(len, '0'), readCache(len, '0');
memReader.Read(pos, &readMem[0], len);
cacheReader.Read(pos, &readCache[0], len);
TEST_EQUAL(readMem, readCache, (pos, len, i));
}
}

View file

@ -0,0 +1,122 @@
#include "testing/testing.hpp"
#include "coding/coding_tests/reader_test.hpp"
#include "coding/buffer_reader.hpp"
#include "coding/file_reader.hpp"
#include "coding/file_writer.hpp"
#include "coding/reader_streambuf.hpp"
#include <cstring>
#include <iostream>
#include <memory>
#include <string>
using namespace std;
namespace
{
string const kData("Quick brown fox jumps over a lazy dog...");
}
UNIT_TEST(MemReaderSmokeTest)
{
MemReader memReader(kData.c_str(), kData.size());
TestReader(memReader);
}
UNIT_TEST(FileReaderSmokeTest)
{
{
FileWriter writer("reader_test_tmp.dat");
writer.Write(&kData[0], kData.size());
}
{
FileReader fileReader("reader_test_tmp.dat");
TestReader(fileReader);
}
FileWriter::DeleteFileX("reader_test_tmp.dat");
}
UNIT_TEST(BufferReaderSmokeTest)
{
BufferReader r1(&kData[0], kData.size());
TestReader(r1);
{
string const data("BlaBla " + kData);
FileWriter writer("reader_test_tmp.dat");
writer.Write(&data[0], data.size());
}
BufferReader r2(FileReader("reader_test_tmp.dat"), 7);
TestReader(r2);
FileWriter::DeleteFileX("reader_test_tmp.dat");
}
UNIT_TEST(BufferReaderEmptyTest)
{
MemReader reader(NULL, 0);
BufferReader bufReader(reader, 0);
TEST_EQUAL(bufReader.Size(), 0, ());
}
UNIT_TEST(FileReaderNonExistentFileTest)
{
try
{
FileReader reader("skjhfaxniauiuq2bmnszmn093sklsd");
TEST(false, ("Exception should be thrown!"));
}
catch (FileReader::OpenException &)
{}
}
UNIT_TEST(FileReaderReadAsText)
{
char const fName[] = "zzzuuuuuummmba";
{
FileWriter f(fName);
f.Write(fName, ARRAY_SIZE(fName) - 1);
}
{
string text;
FileReader(fName).ReadAsString(text);
TEST_EQUAL(text, fName, ());
}
FileWriter::DeleteFileX(fName);
}
UNIT_TEST(ReaderStreamBuf)
{
string const name = "test.txt";
{
FileWriter writer(name);
WriterStreamBuf buffer(writer);
ostream s(&buffer);
s << "hey!" << '\n' << 1 << '\n' << 3.14 << '\n' << 0x0102030405060708ull << endl;
}
{
ReaderStreamBuf buffer(make_unique<FileReader>(name));
istream s(&buffer);
string str;
int i;
double d;
unsigned long long ull;
s >> str >> i >> d >> ull;
TEST_EQUAL(str, "hey!", ());
TEST_EQUAL(i, 1, ());
TEST_ALMOST_EQUAL_ULPS(d, 3.14, ());
TEST_EQUAL(ull, 0x0102030405060708ull, ());
}
FileWriter::DeleteFileX(name);
}

View file

@ -0,0 +1,51 @@
#pragma once
#include "testing/testing.hpp"
#include "coding/reader.hpp"
#include <string>
namespace
{
template <class ReaderT>
void ReadToStringFromPos(ReaderT const & reader, std::string & str, uint64_t pos, size_t size)
{
str.resize(size);
reader.Read(pos, &str[0], str.size());
}
template <class SourceT>
void ReadToStringFromSource(SourceT & source, std::string & str, size_t size)
{
str.resize(size);
source.Read(&str[0], str.size());
}
} // namespace
template <typename ReaderT>
void TestReader(ReaderT const & reader)
{
ReaderSource<ReaderT> source(reader);
std::string d1;
ReadToStringFromSource(source, d1, 6);
TEST_EQUAL(d1, "Quick ", ());
ReadToStringFromSource(source, d1, 6);
TEST_EQUAL(d1, "brown ", ());
ReaderT subReader = source.SubReader(10);
ReadToStringFromPos(subReader, d1, 1, 3);
TEST_EQUAL(d1, "ox ", ());
ReaderT subSubReader = subReader.SubReader(2, 8);
ReadToStringFromPos(subSubReader, d1, 0, 2);
TEST_EQUAL(d1, "x ", ());
ReadToStringFromSource(source, d1, 5);
TEST_EQUAL(d1, "over ", ());
ReaderSource<ReaderT> subReaderSource(subReader);
ReadToStringFromSource(subReaderSource, d1, 6);
TEST_EQUAL(d1, "fox ju", ());
}

View file

@ -0,0 +1,153 @@
#include "testing/testing.hpp"
#include "coding/byte_stream.hpp"
#include "coding/file_reader.hpp"
#include "coding/file_writer.hpp"
#include "coding/read_write_utils.hpp"
#include "coding/reader_writer_ops.hpp"
#include "base/random.hpp"
#include <algorithm>
#include <vector>
namespace rw_ops_tests
{
using namespace std;
namespace
{
void GetReverseForReaderAndTmpFile(Reader const & src, vector<char> & buffer)
{
char const * tmpFile = "reversed_file.tmp";
{
FileWriter writer(tmpFile);
rw_ops::Reverse(src, writer);
}
{
FileReader reader(tmpFile);
buffer.clear();
MemWriter<vector<char>> writer(buffer);
rw_ops::Reverse(reader, writer);
}
FileWriter::DeleteFileX(tmpFile);
}
void FillRandFile(string const & fName, size_t count)
{
FileWriter writer(fName);
base::UniformRandom<int8_t> rand;
while (count-- > 0)
{
int8_t const c = rand();
writer.Write(&c, 1);
}
}
} // namespace
UNIT_TEST(Reverse_Smoke)
{
{
char arr[] = {0xA, 0xB, 0xC, 0xD, 0xF};
size_t const sz = ARRAY_SIZE(arr);
MemReader reader(&arr[0], sz);
vector<char> buffer;
GetReverseForReaderAndTmpFile(reader, buffer);
TEST_EQUAL(buffer.size(), ARRAY_SIZE(arr), ());
TEST(equal(arr, arr + ARRAY_SIZE(arr), buffer.begin()), ());
}
{
char const * tmpFile = "random_file.tmp";
{
FillRandFile(tmpFile, 10 * 1024 + 527);
FileReader reader(tmpFile);
vector<char> buffer;
GetReverseForReaderAndTmpFile(reader, buffer);
string str;
reader.ReadAsString(str);
TEST_EQUAL(str.size(), buffer.size(), ());
TEST(equal(str.begin(), str.end(), buffer.begin()), ());
}
FileWriter::DeleteFileX(tmpFile);
}
}
namespace
{
struct ThePOD
{
uint32_t m_i;
double m_d;
};
bool operator==(ThePOD const & r1, ThePOD const & r2)
{
return (r1.m_i == r2.m_i && r1.m_d == r2.m_d);
}
} // namespace
UNIT_TEST(ReadWrite_POD)
{
base::UniformRandom<uint32_t> rand;
size_t const count = 1000;
vector<ThePOD> src(1000);
for (size_t i = 0; i < count; ++i)
{
src[i].m_i = rand();
src[i].m_d = double(rand()) / double(rand());
}
vector<char> buffer;
PushBackByteSink<vector<char>> sink(buffer);
rw::WriteVectorOfPOD(sink, src);
buffer_vector<ThePOD, 128> dest;
ArrayByteSource byteSrc(buffer.data());
rw::ReadVectorOfPOD(byteSrc, dest);
TEST(equal(src.begin(), src.end(), dest.begin()), ());
}
namespace
{
template <class T>
void TestIntegral()
{
std::vector<T> ethalon{static_cast<T>(-1), 0, 1, static_cast<T>(-2), 2, std::numeric_limits<T>::min(),
std::numeric_limits<T>::max()};
std::string buffer;
MemWriter writer(buffer);
rw::Write(writer, ethalon);
std::vector<T> expected;
MemReader reader(buffer);
ReaderSource src(reader);
rw::Read(src, expected);
TEST_EQUAL(ethalon, expected, ());
}
} // namespace
UNIT_TEST(ReadWrite_Integral)
{
TestIntegral<uint32_t>();
TestIntegral<int32_t>();
TestIntegral<uint64_t>();
TestIntegral<int64_t>();
}
} // namespace rw_ops_tests

View file

@ -0,0 +1,265 @@
#include "testing/testing.hpp"
#include "coding/serdes_json.hpp"
#include "coding/writer.hpp"
#include "base/string_utils.hpp"
#include "base/visitor.hpp"
#include <array>
#include <chrono>
#include <deque>
#include <limits>
#include <map>
#include <memory>
#include <unordered_set>
#include <vector>
using namespace std;
namespace
{
template <typename Ptr>
bool SamePtrValue(Ptr const & lhs, Ptr const & rhs)
{
return (!lhs && !rhs) || (lhs && rhs && *lhs == *rhs);
}
template <typename T>
bool TestSerDes(T const & value)
{
string jsonStr;
{
using Sink = MemWriter<string>;
Sink sink(jsonStr);
coding::SerializerJson<Sink> ser(sink);
ser(value);
}
T deserializedValue;
try
{
coding::DeserializerJson des(jsonStr);
des(deserializedValue);
}
catch (base::Json::Exception const & exception)
{
LOG(LWARNING, ("Exception while parsing json string, reason:", exception.what(), "json:", jsonStr));
return false;
}
return deserializedValue == value;
}
enum class TestEnum
{
Value0 = 0,
Value1,
Value2,
Count
};
struct ValueTypes
{
DECLARE_VISITOR(visitor(m_boolValue, "boolValue"), visitor(m_uint8Value, "uint8Value"),
visitor(m_uint32Value, "uint32Value"), visitor(m_uint64Value, "uint64Value"),
visitor(m_int8Value, "int8Value"), visitor(m_int32Value, "int32Value"),
visitor(m_int64Value, "int64Value"), visitor(m_doubleValue, "doubleValue"),
visitor(m_stringValue, "stringValue"), visitor(m_enumValue, "enumValue"),
visitor(m_timePointValue, "timePointValue"))
ValueTypes() = default;
ValueTypes(uint32_t testCounter)
: m_boolValue(static_cast<bool>(testCounter % 2))
, m_uint8Value(numeric_limits<uint8_t>::max() - static_cast<uint8_t>(testCounter))
, m_uint32Value(numeric_limits<uint32_t>::max() - testCounter)
, m_uint64Value(numeric_limits<uint64_t>::max() - testCounter)
, m_int8Value(numeric_limits<int8_t>::min() + static_cast<int8_t>(testCounter))
, m_int32Value(numeric_limits<int32_t>::min() + static_cast<int32_t>(testCounter))
, m_int64Value(numeric_limits<int64_t>::min() + static_cast<int64_t>(testCounter))
, m_doubleValue(numeric_limits<double>::max() - testCounter)
, m_stringValue(strings::to_string(testCounter))
, m_enumValue(static_cast<TestEnum>(testCounter % static_cast<uint32_t>(TestEnum::Count)))
, m_timePointValue(chrono::system_clock::now())
{}
bool operator==(ValueTypes const & rhs) const
{
return m_boolValue == rhs.m_boolValue && m_uint8Value == rhs.m_uint8Value && m_uint32Value == rhs.m_uint32Value &&
m_uint64Value == rhs.m_uint64Value && m_int8Value == rhs.m_int8Value && m_int32Value == rhs.m_int32Value &&
m_int64Value == rhs.m_int64Value && m_doubleValue == rhs.m_doubleValue &&
m_stringValue == rhs.m_stringValue && m_enumValue == rhs.m_enumValue &&
m_timePointValue == rhs.m_timePointValue;
}
bool m_boolValue;
uint8_t m_uint8Value;
uint32_t m_uint32Value;
uint64_t m_uint64Value;
int8_t m_int8Value;
int32_t m_int32Value;
int64_t m_int64Value;
double m_doubleValue;
string m_stringValue;
TestEnum m_enumValue;
chrono::system_clock::time_point m_timePointValue;
};
struct ObjectTypes
{
DECLARE_VISITOR(visitor(m_pointValue, "pointValue"), visitor(m_latLonValue, "latLonValue"),
visitor(m_pairValue, "pairValue"))
ObjectTypes() = default;
ObjectTypes(uint32_t testCounter)
: m_pointValue(testCounter, testCounter)
, m_latLonValue(testCounter, testCounter)
, m_pairValue(testCounter, strings::to_string(testCounter))
{}
bool operator==(ObjectTypes const & rhs) const
{
return m_pointValue == rhs.m_pointValue && m_latLonValue == rhs.m_latLonValue && m_pairValue == rhs.m_pairValue;
}
m2::PointD m_pointValue;
ms::LatLon m_latLonValue;
pair<uint32_t, string> m_pairValue;
};
struct PointerTypes
{
DECLARE_VISITOR(visitor(m_uniquePtrValue, "uniquePtrValue"), visitor(m_sharedPtrValue, "sharedPtrValue"))
PointerTypes() = default;
PointerTypes(uint32_t testCounter)
{
m_uniquePtrValue = make_unique<ValueTypes>(testCounter);
m_sharedPtrValue = make_shared<ValueTypes>(testCounter);
}
bool operator==(PointerTypes const & rhs) const
{
return SamePtrValue(m_uniquePtrValue, rhs.m_uniquePtrValue) && SamePtrValue(m_sharedPtrValue, rhs.m_sharedPtrValue);
}
unique_ptr<ValueTypes> m_uniquePtrValue;
shared_ptr<ValueTypes> m_sharedPtrValue;
};
struct ArrayTypes
{
DECLARE_VISITOR(visitor(m_arrayValue, "arrayValue"), visitor(m_dequeValue, "dequeValue"),
visitor(m_vectorValue, "vectorValue"), visitor(m_mapValue, "mapValue"),
visitor(m_unorderedSetValue, "unorderedSetValue"))
ArrayTypes() = default;
ArrayTypes(uint32_t testCounter)
: m_arrayValue({{testCounter, testCounter + 1, testCounter + 2}})
, m_dequeValue({testCounter + 2, testCounter + 1, testCounter})
, m_vectorValue({testCounter, testCounter + 2, testCounter + 1})
, m_mapValue({{testCounter, testCounter}, {testCounter + 1, testCounter + 1}})
, m_unorderedSetValue({testCounter + 2, testCounter, testCounter + 1})
{}
bool operator==(ArrayTypes const & rhs) const
{
return m_arrayValue == rhs.m_arrayValue && m_dequeValue == rhs.m_dequeValue && m_vectorValue == rhs.m_vectorValue &&
m_mapValue == rhs.m_mapValue && m_unorderedSetValue == rhs.m_unorderedSetValue;
}
array<uint32_t, 3> m_arrayValue;
deque<uint32_t> m_dequeValue;
vector<uint32_t> m_vectorValue;
map<uint32_t, uint32_t> m_mapValue;
unordered_set<uint32_t> m_unorderedSetValue;
};
} // namespace
UNIT_TEST(SerdesJsonTest)
{
{
ValueTypes valueTypes(0);
TEST(TestSerDes(valueTypes), ());
ObjectTypes objectTypes(0);
TEST(TestSerDes(objectTypes), ());
PointerTypes pointersTypes(0);
TEST(TestSerDes(pointersTypes), ());
ArrayTypes arrayTypes(0);
TEST(TestSerDes(arrayTypes), ());
}
{
pair<string, m2::PointD> testValue = {"test", m2::PointD(1.0, 2.0)};
TEST(TestSerDes(testValue), ());
}
{
pair<m2::PointD, m2::PointD> testValue = {m2::PointD(1.0, 2.0), m2::PointD(2.0, 3.0)};
TEST(TestSerDes(testValue), ());
}
{
pair<string, pair<string, string>> testValue = {"test", {"test1", "test2"}};
TEST(TestSerDes(testValue), ());
}
{
pair<string, ValueTypes> testValue = {"test", ValueTypes(0)};
TEST(TestSerDes(testValue), ());
}
{
array<ObjectTypes, 2> testValue = {{ObjectTypes(0), ObjectTypes(1)}};
TEST(TestSerDes(testValue), ());
}
{
struct Hasher
{
size_t operator()(pair<string, string> const & item) const { return m_hasher(item.first + item.second); }
hash<string> m_hasher;
};
unordered_set<pair<string, string>, Hasher> testValue = {{"ab", "ab"}, {"ef", "ef"}, {"cd", "cd"}};
TEST(TestSerDes(testValue), ());
}
{
vector<vector<uint32_t>> testValue;
for (uint32_t i = 0; i < 5; ++i)
testValue.push_back({i, i, i});
TEST(TestSerDes(testValue), ());
}
{
vector<ValueTypes> valuesVector;
for (uint32_t i = 0; i < 5; ++i)
valuesVector.push_back(ValueTypes(i));
TEST(TestSerDes(valuesVector), ());
}
{
map<uint32_t, ValueTypes> valuesMap;
for (uint32_t i = 0; i < 5; ++i)
valuesMap.insert(make_pair(i, ValueTypes(i)));
TEST(TestSerDes(valuesMap), ());
}
{
vector<ObjectTypes> objectsVector;
for (uint32_t i = 0; i < 5; ++i)
objectsVector.push_back(ObjectTypes(i));
TEST(TestSerDes(objectsVector), ());
}
{
map<uint32_t, ObjectTypes> objectsMap;
for (uint32_t i = 0; i < 5; ++i)
objectsMap.insert(make_pair(i, ObjectTypes(i)));
TEST(TestSerDes(objectsMap), ());
}
}

View file

@ -0,0 +1,49 @@
#include "testing/testing.hpp"
#include "coding/sha1.hpp"
namespace sha1_test
{
using namespace coding;
UNIT_TEST(SHA1_Smoke)
{
char const * bytes[] = {
"H",
"He",
"Hel",
"Hell",
"Hello",
"Hello,",
"Hello, ",
"Hello, World!",
"Organic Maps is the ultimate companion app for travellers, tourists, hikers, and cyclists!",
};
SHA1::Hash encoded[] = {
{0x7C, 0xF1, 0x84, 0xF4, 0xC6, 0x7A, 0xD5, 0x82, 0x83, 0xEC,
0xB1, 0x93, 0x49, 0x72, 0x0B, 0x0C, 0xAE, 0x75, 0x68, 0x29},
{0x53, 0xA4, 0x17, 0x79, 0x6C, 0x77, 0x78, 0x51, 0x00, 0x3B,
0x3F, 0x24, 0x31, 0xE8, 0xEE, 0xF5, 0x62, 0x5E, 0xC1, 0x5B},
{0xDB, 0xC2, 0xD1, 0xFE, 0xD0, 0xDC, 0x37, 0xA7, 0x0A, 0xEA,
0x0F, 0x37, 0x69, 0x58, 0xC8, 0x02, 0xED, 0xDC, 0x05, 0x59},
{0xED, 0x10, 0xFE, 0x11, 0x3D, 0xE1, 0xC0, 0xBD, 0xAA, 0xAA,
0xF0, 0x9B, 0x88, 0xCD, 0x34, 0x1E, 0xA0, 0xF4, 0x44, 0x28},
{0xF7, 0xFF, 0x9E, 0x8B, 0x7B, 0xB2, 0xE0, 0x9B, 0x70, 0x93,
0x5A, 0x5D, 0x78, 0x5E, 0x0C, 0xC5, 0xD9, 0xD0, 0xAB, 0xF0},
{0x65, 0x61, 0x1E, 0x95, 0x20, 0xE7, 0x68, 0x14, 0x5D, 0xAD,
0xAA, 0x1D, 0x10, 0x7F, 0xDD, 0x52, 0x07, 0xE6, 0x30, 0x57},
{0xF5, 0x2A, 0xB5, 0x7F, 0xA5, 0x1D, 0xFA, 0x71, 0x45, 0x05,
0x29, 0x44, 0x44, 0x46, 0x3A, 0xE5, 0xA0, 0x09, 0xAE, 0x34},
{0x0A, 0x0A, 0x9F, 0x2A, 0x67, 0x72, 0x94, 0x25, 0x57, 0xAB,
0x53, 0x55, 0xD7, 0x6A, 0xF4, 0x42, 0xF8, 0xF6, 0x5E, 0x01},
{0x48, 0xF5, 0x4D, 0x3D, 0x08, 0xD5, 0xC0, 0x57, 0x6B, 0x3A,
0xC5, 0x3E, 0xEF, 0x22, 0x4A, 0xB8, 0x46, 0x7B, 0xA2, 0xFC},
};
static_assert(std::size(bytes) == std::size(encoded));
for (size_t i = 0; i < std::size(bytes); ++i)
TEST_EQUAL(SHA1::CalculateForString(bytes[i]), encoded[i], ());
}
} // namespace sha1_test

View file

@ -0,0 +1,89 @@
#include "testing/testing.hpp"
#include "coding/file_writer.hpp"
#include "coding/mmap_reader.hpp"
#include "coding/simple_dense_coding.hpp"
#include "coding/succinct_mapper.hpp"
#include "base/logging.hpp"
#include "base/scope_guard.hpp"
#include <limits>
#include <random>
#include <string>
#include <vector>
namespace simple_dense_coding_test
{
using namespace coding;
using namespace std;
namespace
{
void TestSDC(vector<uint8_t> const & data, SimpleDenseCoding const & coding)
{
TEST_EQUAL(data.size(), coding.Size(), ());
for (size_t i = 0; i < data.size(); ++i)
TEST_EQUAL(data[i], coding.Get(i), ());
}
} // namespace
UNIT_TEST(SimpleDenseCoding_Smoke)
{
size_t const kSize = numeric_limits<uint8_t>::max();
vector<uint8_t> data(kSize);
for (size_t i = 0; i < data.size(); ++i)
data[i] = i;
string const kTestFile = "test.tmp";
SCOPE_GUARD(cleanup, bind(&FileWriter::DeleteFileX, kTestFile));
{
SimpleDenseCoding coding(data);
TestSDC(data, coding);
FileWriter writer(kTestFile);
Freeze(coding, writer, "SimpleDenseCoding");
}
{
MmapReader reader(kTestFile);
SimpleDenseCoding coding;
Map(coding, reader.Data(), "SimpleDenseCoding");
TestSDC(data, coding);
}
}
UNIT_TEST(SimpleDenseCoding_Ratio)
{
for (uint8_t const maxValue : {16, 32, 64})
{
size_t constexpr kSize = 1 << 20;
normal_distribution<> randDist(maxValue / 2, 2);
random_device randDevice;
mt19937 randEngine(randDevice());
vector<uint8_t> data(kSize);
for (size_t i = 0; i < kSize; ++i)
{
double d = round(randDist(randEngine));
if (d < 0)
d = 0;
else if (d > maxValue)
d = maxValue;
data[i] = static_cast<uint8_t>(d);
}
SimpleDenseCoding coding(data);
TestSDC(data, coding);
vector<uint8_t> buffer;
MemWriter writer(buffer);
Freeze(coding, writer, "");
auto const ratio = data.size() / double(buffer.size());
LOG(LINFO, (maxValue, ratio));
TEST_GREATER(ratio, 1.8, ());
}
}
} // namespace simple_dense_coding_test

View file

@ -0,0 +1,26 @@
#include "testing/testing.hpp"
#include "coding/sparse_vector.hpp"
UNIT_TEST(SparseVector_Smoke)
{
uint32_t const arr[] = {0, 0, 5, 0, 7, 1000, 0, 0, 1, 0};
uint64_t const count = std::size(arr);
coding::SparseVectorBuilder<uint32_t> builder(count);
for (uint32_t v : arr)
if (v == 0)
builder.PushEmpty();
else
builder.PushValue(v);
auto vec = builder.Build();
TEST_EQUAL(vec.GetSize(), count, ());
for (size_t i = 0; i < count; ++i)
{
TEST_EQUAL(vec.Has(i), (arr[i] != 0), ());
if (arr[i] != 0)
TEST_EQUAL(vec.Get(i), arr[i], ());
}
}

View file

@ -0,0 +1,263 @@
#include "testing/testing.hpp"
#include "coding/string_utf8_multilang.hpp"
#include "base/control_flow.hpp"
#include <utf8.h>
#include <cstddef>
#include <string>
#include <vector>
using namespace std;
namespace
{
struct lang_string
{
char const * m_lang;
char const * m_str;
};
lang_string gArr[] = {{"default", "default"},
{"en", "abcd"},
{"ru", "\xD0\xA0\xD0\xB0\xD1\x88\xD0\xBA\xD0\xB0"},
{"be", "\xE2\x82\xAC\xF0\xA4\xAD\xA2"}};
void TestMultilangString(lang_string const * arr, size_t count)
{
StringUtf8Multilang s;
for (size_t i = 0; i < count; ++i)
{
string src(arr[i].m_str);
TEST(utf8::is_valid(src.begin(), src.end()), ());
s.AddString(arr[i].m_lang, src);
string_view comp;
TEST(s.GetString(arr[i].m_lang, comp), ());
TEST_EQUAL(src, comp, ());
}
for (size_t i = 0; i < count; ++i)
{
string_view comp;
TEST(s.GetString(arr[i].m_lang, comp), ());
TEST_EQUAL(arr[i].m_str, comp, ());
}
string_view test;
TEST(!s.GetString("xxx", test), ());
}
} // namespace
UNIT_TEST(MultilangString_Smoke)
{
StringUtf8Multilang s;
TestMultilangString(gArr, ARRAY_SIZE(gArr));
}
UNIT_TEST(MultilangString_ForEach)
{
StringUtf8Multilang s;
for (size_t i = 0; i < ARRAY_SIZE(gArr); ++i)
s.AddString(gArr[i].m_lang, gArr[i].m_str);
{
size_t index = 0;
s.ForEach([&index](char lang, string_view utf8s)
{
TEST_EQUAL(lang, StringUtf8Multilang::GetLangIndex(gArr[index].m_lang), ());
TEST_EQUAL(utf8s, gArr[index].m_str, ());
++index;
});
TEST_EQUAL(index, ARRAY_SIZE(gArr), ());
}
{
size_t index = 0;
vector<string> const expected = {"default", "en", "ru"};
vector<string> actual;
s.ForEach([&index, &actual](char lang, string_view)
{
actual.push_back(gArr[index].m_lang);
++index;
if (index == 3)
return base::ControlFlow::Break;
return base::ControlFlow::Continue;
});
TEST_EQUAL(index, 3, ());
TEST_EQUAL(actual, expected, ());
}
}
UNIT_TEST(MultilangString_Unique)
{
StringUtf8Multilang s;
string_view cmp;
s.AddString(0, "xxx");
TEST(s.GetString(0, cmp), ());
TEST_EQUAL(cmp, "xxx", ());
s.AddString(1, "yyy");
TEST(s.GetString(1, cmp), ());
TEST_EQUAL(cmp, "yyy", ());
s.AddString(0, "xxxxxx");
TEST(s.GetString(0, cmp), ());
TEST_EQUAL(cmp, "xxxxxx", ());
TEST(s.GetString(1, cmp), ());
TEST_EQUAL(cmp, "yyy", ());
s.AddString(0, "x");
TEST(s.GetString(0, cmp), ());
TEST_EQUAL(cmp, "x", ());
TEST(s.GetString(1, cmp), ());
TEST_EQUAL(cmp, "yyy", ());
}
UNIT_TEST(MultilangString_LangNames)
{
// It is important to compare the contents of the strings, and not just pointers
TEST_EQUAL(string("Беларуская"), StringUtf8Multilang::GetLangNameByCode(StringUtf8Multilang::GetLangIndex("be")), ());
auto const & langs = StringUtf8Multilang::GetSupportedLanguages();
// Using size_t workaround, because our logging/testing macroses do not support passing POD types
// by value, only by reference. And our constant is a constexpr.
TEST_LESS_OR_EQUAL(langs.size(), static_cast<size_t>(StringUtf8Multilang::kMaxSupportedLanguages), ());
auto const international = StringUtf8Multilang::GetLangIndex("int_name");
TEST_EQUAL(langs[international].m_code, string("int_name"), ());
}
UNIT_TEST(MultilangString_HasString)
{
StringUtf8Multilang s;
s.AddString(0, "xxx");
s.AddString(18, "yyy");
s.AddString(63, "zzz");
TEST(s.HasString(0), ());
TEST(s.HasString(18), ());
TEST(s.HasString(63), ());
TEST(!s.HasString(1), ());
TEST(!s.HasString(32), ());
}
/*
UNIT_TEST(MultilangString_ForEachLanguage)
{
using Translations = vector<pair<string, string>>;
StringUtf8Multilang s;
Translations const scotlandTranslations = {
{"be", "Шатландыя"}, {"cs", "Skotsko"}, {"cy", "Yr Alban"}, {"da", "Skotland"},
{"de", "Schottland"}, {"eo", "Skotlando"}, {"es", "Escocia"}, {"eu", "Eskozia"},
{"fi", "Skotlanti"}, {"fr", "Écosse"}, {"ga", "Albain"}, {"gd", "Alba"},
{"hr", "Škotska"}, {"ia", "Scotia"}, {"io", "Skotia"}, {"ja", "スコットランド"},
{"ku", "Skotland"}, {"lfn", "Scotland"}, {"nl", "Schotland"}, {"pl", "Szkocja"},
{"ru", "Шотландия"}, {"sco", "Scotland"}, {"sk", "Škótsko"}, {"sr", "Шкотска"},
{"sv", "Skottland"}, {"tok", "Sukosi"}, {"tzl", "Escot"}, {"uk", "Шотландія"},
{"vo", "Skotän"}, {"zh", "苏格兰"}};
Translations const usedTranslations = {
{"be", "Шатландыя"}, {"cs", "Skotsko"}, {"eu", "Eskozia"}, {"zh", "苏格兰"}};
for (auto const & langAndTranslation : scotlandTranslations)
{
s.AddString(langAndTranslation.first, langAndTranslation.second);
}
set<string> testAccumulator;
vector<string> const preferredLanguages = {"cs", "eu", "be", "zh"};
vector<string> const preferredTranslations = {"Skotsko", "Eskozia", "Шатландыя", "苏格兰"};
auto const fn = [&testAccumulator, &usedTranslations](int8_t code, string const & name) {
testAccumulator.insert(name);
if (usedTranslations.size() > testAccumulator.size())
return base::ControlFlow::Continue;
return base::ControlFlow::Break;
};
TEST(s.ForEachLanguage(preferredLanguages, fn), ());
TEST_EQUAL(testAccumulator.size(), preferredTranslations.size(), ());
for (string const & translation : preferredTranslations)
{
TEST(testAccumulator.find(translation) != testAccumulator.end(), ());
}
testAccumulator.clear();
vector<string> const corruptedLanguages = {"Матерный", "Детский", "BirdLanguage"};
TEST(!s.ForEachLanguage(corruptedLanguages, fn), ());
TEST_EQUAL(testAccumulator.size(), 0, ());
}
*/
UNIT_TEST(MultilangString_RemoveString)
{
auto testRemove = [](vector<pair<uint8_t, string>> const & strings, set<uint8_t> const & codesToRemove)
{
StringUtf8Multilang str;
for (auto const & s : strings)
str.AddString(s.first, s.second);
string_view tmp;
for (auto const & s : strings)
{
TEST(str.HasString(s.first), ());
TEST(str.GetString(s.first, tmp), ());
TEST_EQUAL(tmp, s.second, ());
}
for (auto c : codesToRemove)
str.RemoveString(c);
for (auto const & s : strings)
{
if (codesToRemove.find(s.first) == codesToRemove.end())
{
TEST(str.HasString(s.first), ());
TEST(str.GetString(s.first, tmp), ());
TEST_EQUAL(tmp, s.second, ());
}
else
{
TEST(!str.HasString(s.first), ());
}
}
// No extra languages or other data damage.
str.ForEach([&](uint8_t lang, auto const &)
{
TEST(base::FindIf(strings, [&lang](auto const & s) { return s.first == lang; }) != strings.end(), ());
TEST(codesToRemove.find(lang) == codesToRemove.end(), ());
});
};
vector<pair<uint8_t, string>> strings = {{0, "aaa"}, {1, "bbb"}, {2, "ccc"}, {9, "ddd"},
{17, "eee"}, {27, "fff"}, {37, "ggg"}};
testRemove(strings, {0});
testRemove(strings, {1});
testRemove(strings, {9, 27});
testRemove(strings, {37});
testRemove(strings, {0, 1, 2, 9, 17, 27, 37});
testRemove(strings, {39});
}
UNIT_TEST(MultilangString_Buffers)
{
StringUtf8Multilang s;
s.AddString(StringUtf8Multilang::kInternationalCode, "blabla");
StringUtf8Multilang const ss = StringUtf8Multilang::FromBuffer(std::string(s.GetBuffer()));
std::string_view test;
TEST_EQUAL(ss.CountLangs(), 1, ());
TEST(ss.GetString(StringUtf8Multilang::kInternationalCode, test), ());
TEST_EQUAL(test, "blabla", ());
}

View file

@ -0,0 +1,90 @@
#include "testing/testing.hpp"
#include "coding/succinct_mapper.hpp"
#include "coding/writer.hpp"
#include "3party/succinct/elias_fano_compressed_list.hpp"
#include <random>
#include <vector>
namespace succinct_ef_test
{
using namespace std;
template <class T>
vector<T> GetUniformValues(size_t count)
{
// Use max - 1 because succinct makes val + 1 encoding internals.
uniform_int_distribution<T> randDist(0, numeric_limits<T>::max() - 1);
random_device randDevice;
mt19937 randEngine(randDevice());
vector<T> data(count);
for (size_t i = 0; i < count; ++i)
data[i] = randDist(randEngine);
return data;
}
template <class T>
vector<T> GetNormalValues(size_t count, T mean)
{
normal_distribution<> randDist(mean, 2);
random_device randDevice;
mt19937 randEngine(randDevice());
vector<T> data(count);
for (size_t i = 0; i < count; ++i)
{
// Use max - 1 because succinct makes val + 1 encoding internals.
T constexpr const kMax = numeric_limits<T>::max() - 1;
double d = round(randDist(randEngine));
if (d < 0)
d = 0;
else if (d > kMax)
d = kMax;
data[i] = static_cast<T>(d);
}
return data;
}
template <class T>
double GetCompressionRatio(vector<T> const & data)
{
succinct::elias_fano_compressed_list efList(data);
vector<uint8_t> buffer;
MemWriter writer(buffer);
coding::Freeze(efList, writer, "");
return data.size() * sizeof(T) / double(buffer.size());
}
UNIT_TEST(SuccinctEFList_Ratio)
{
size_t constexpr kCount = 1 << 20;
{
// No need to use EFList for generic data.
double const ratio2 = GetCompressionRatio(GetUniformValues<uint16_t>(kCount));
TEST_LESS(ratio2, 1, ());
LOG(LINFO, ("Uniform ratio 2:", ratio2));
double const ratio4 = GetCompressionRatio(GetUniformValues<uint32_t>(kCount));
TEST_LESS(ratio4, 1, ());
LOG(LINFO, ("Uniform ratio 4:", ratio4));
}
{
// EF is good for some kind of normal distribution of small values.
double const ratio2 = GetCompressionRatio(GetNormalValues(kCount, uint16_t(128)));
TEST_GREATER(ratio2, 1, ());
LOG(LINFO, ("Normal ratio 2:", ratio2));
double const ratio4 = GetCompressionRatio(GetNormalValues(kCount, uint32_t(1024)));
TEST_GREATER(ratio4, 1, ());
LOG(LINFO, ("Normal ratio 4:", ratio4));
}
}
} // namespace succinct_ef_test

View file

@ -0,0 +1,55 @@
#include "testing/testing.hpp"
#include "coding/succinct_mapper.hpp"
#include "coding/writer.hpp"
#include <cstdint>
#include <vector>
#include "3party/succinct/mapper.hpp"
using namespace coding;
UNIT_TEST(ReverseMapper_Smoke)
{
uint64_t data = 0x0123456789abcdef;
uint64_t rdata = 0x0;
TEST_EQUAL(8, ReverseMap(rdata, reinterpret_cast<uint8_t *>(&data), "uint64_t"), ());
// Test that reversed uint64_t was read.
TEST_EQUAL(0xefcdab8967452301, rdata, ());
// Test that underlying buffer was modified.
TEST_EQUAL(0xefcdab8967452301, data, ());
}
UNIT_TEST(Freeze_Smoke)
{
std::vector<uint8_t> data;
{
MemWriter<decltype(data)> writer(data);
uint64_t const data = 0x0123456789abcdef;
Freeze(data, writer, "uint64_t");
}
TEST_EQUAL(8, data.size(), ());
uint64_t value = 0x0;
TEST_EQUAL(8, Map(value, reinterpret_cast<uint8_t const *>(data.data()), "uint64_t"), ());
TEST_EQUAL(0x0123456789abcdef, value, ());
}
UNIT_TEST(ReverseFreeze_Smoke)
{
std::vector<uint8_t> data;
{
MemWriter<decltype(data)> writer(data);
uint64_t const data = 0x0123456789abcdef;
ReverseFreeze(data, writer, "uint64_t");
}
TEST_EQUAL(8, data.size(), ());
uint64_t value = 0x0;
TEST_EQUAL(8, Map(value, reinterpret_cast<uint8_t const *>(data.data()), "uint64_t"), ());
TEST_EQUAL(0xefcdab8967452301, value, ());
}

View file

@ -0,0 +1,193 @@
#include "coding/coding_tests/test_polylines.hpp"
namespace geometry_coding_tests
{
P arr1[376] = {P(25.624035299999999182, 72.26346513007850092), P(25.624273200000001083, 72.263461698303601111),
P(25.624488899999999347, 72.26341365347376211), P(25.624979400000000851, 72.263304218156179104),
P(25.626030799999998777, 72.263025101705878228), P(25.629390999999998257, 72.261676817778678128),
P(25.630162399999999678, 72.26138836631159279), P(25.631299500000000791, 72.260963603282490908),
P(25.63236829999999955, 72.26051310574631259), P(25.63325580000000059, 72.260190152533994024),
P(25.633720499999999021, 72.260019906865807116), P(25.634314799999998513, 72.259865485075735592),
P(25.634578999999998672, 72.259830215951140531), P(25.635424199999999217, 72.259772832171691448),
P(25.635776400000001018, 72.259834791404088605), P(25.638406499999998545, 72.260604806439260983),
P(25.639231599999998679, 72.260931765228107793), P(25.639867699999999928, 72.261237563690428942),
P(25.640699399999999031, 72.261850499331046649), P(25.643624299999999039, 72.264447578158552687),
P(25.644772700000000754, 72.265904403664706024), P(25.645413800000000037, 72.267106341816230497),
P(25.646751600000001758, 72.270404536824941033), P(25.64890219999999843, 72.275985791150915816),
P(25.649064599999999103, 72.276404165523842948), P(25.650549500000000336, 72.279974564589863917),
P(25.651433600000000723, 72.281545386607334081), P(25.652029899999998719, 72.282193025251160634),
P(25.652814700000000414, 72.282915237415323872), P(25.654197199999998702, 72.283799562153532747),
P(25.656540400000000801, 72.285055792411071707), P(25.658162999999998277, 72.286263412818769325),
P(25.661959599999999426, 72.289916920742129491), P(25.663380199999998865, 72.291039561736027963),
P(25.665810499999999195, 72.292780588759853799), P(25.6700361000000008, 72.29585629709197292),
P(25.670962599999999298, 72.296655718166547899), P(25.672222699999998952, 72.297961211704517837),
P(25.673103499999999855, 72.29896171301187735), P(25.674837499999998869, 72.300952077677095531),
P(25.676358000000000459, 72.302732468128681376), P(25.678018200000000348, 72.304444228347662715),
P(25.680309600000001069, 72.306619426588397914), P(25.682252600000001763, 72.308208994982337003),
P(25.685880300000000886, 72.310749482551628375), P(25.6871223999999998, 72.311619291531712861),
P(25.689502399999998516, 72.313337574126506979), P(25.689994200000001001, 72.313685586072296019),
P(25.691337099999998372, 72.314639003020189989), P(25.694014100000000411, 72.316465930359882464),
P(25.696650399999999337, 72.318133963117716689), P(25.697924300000000386, 72.31863598381848135),
P(25.699229800000001234, 72.31891418618496914), P(25.700213699999999051, 72.319045273707061483),
P(25.703616300000000194, 72.319271576784373678), P(25.707311499999999427, 72.319273484907995453),
P(25.715181600000001083, 72.318046763400587906), P(25.72608460000000008, 72.315978426880036523),
P(25.728649600000000675, 72.31539857900408208), P(25.730824299999998317, 72.315156452495600092),
P(25.732753200000001215, 72.314945427265811873), P(25.736661200000000349, 72.315042353781024076),
P(25.74480259999999987, 72.315568583243575063), P(25.747831600000001373, 72.315649864883624787),
P(25.749809599999998966, 72.315866807206518274), P(25.752535200000000515, 72.316023647210727177),
P(25.755610000000000781, 72.315910501039496694), P(25.760463999999998919, 72.315272459413776573),
P(25.762314700000001011, 72.315021747344800929), P(25.763456399999999036, 72.314812630534717641),
P(25.763716200000001066, 72.31478954377344337), P(25.771413500000001306, 72.314102668549878672),
P(25.779617200000000565, 72.313375160856324442), P(25.784148800000000534, 72.313357035273327256),
P(25.790238899999998523, 72.313577786126856495), P(25.793676300000001334, 72.313716876708198811),
P(25.796280599999999339, 72.314048100429985766), P(25.798680499999999682, 72.31463614103191162),
P(25.800190700000001698, 72.315239260045032665), P(25.803071100000000371, 72.316310615756250968),
P(25.806439499999999754, 72.316835901112042961), P(25.809219599999998707, 72.316657116642062419),
P(25.813906700000000427, 72.315918133153061831), P(25.817769800000000657, 72.31543750249576874),
P(25.819804099999998925, 72.315482531661231747), P(25.823219200000000484, 72.315995217547779816),
P(25.824360999999999677, 72.316092908788874638), P(25.825752500000000111, 72.316000750836963107),
P(25.833053499999998337, 72.315183355397863352), P(25.835087900000001326, 72.314863574077250519),
P(25.836477299999998536, 72.314986830897922232), P(25.838510800000001666, 72.315843910886087542),
P(25.84021669999999915, 72.316586137240363996), P(25.845591399999999993, 72.318366369042564656),
P(25.847287900000001315, 72.318912278071522337), P(25.852937300000000675, 72.321233538069833457),
P(25.857534099999998745, 72.324114950429262194), P(25.858493899999999144, 72.324638770105451613),
P(25.859516599999999187, 72.325101910243901671), P(25.860960299999998568, 72.325309341574609334),
P(25.864481800000000078, 72.325170990340012622), P(25.866295099999998541, 72.325066225249685203),
P(25.871619400000000155, 72.324758609934391984), P(25.873917800000000966, 72.324524655307570242),
P(25.875719000000000136, 72.324229064532204347), P(25.882352300000000866, 72.322516991669758113),
P(25.886094899999999797, 72.321551632301222412), P(25.891463999999999146, 72.320154280548763381),
P(25.892594599999998906, 72.32000410941930113), P(25.893775399999999109, 72.320041127430243932),
P(25.895055100000000436, 72.320205228136387632), P(25.901716900000000265, 72.321479884460799781),
P(25.905201399999999268, 72.322148897878847151), P(25.906758400000001075, 72.322300409542663147),
P(25.908453200000000294, 72.322276366107203671), P(25.910453700000001476, 72.322039939449879853),
P(25.912611200000000622, 72.321379323121732341), P(25.914446699999999169, 72.320507670602822259),
P(25.915890699999998503, 72.319578403757603269), P(25.916971199999998987, 72.318721085380474278),
P(25.923277999999999821, 72.312682767056259081), P(25.924315100000001166, 72.311643903530907096),
P(25.925479700000000349, 72.310661910829537646), P(25.926380200000000542, 72.31012846985993292),
P(25.927288000000000778, 72.309673827336439444), P(25.929170299999999116, 72.308742039167825055),
P(25.931695000000001272, 72.307558244187632113), P(25.935542200000000435, 72.305689970006980616),
P(25.936291600000000557, 72.305420216334297834), P(25.937011699999999337, 72.3052109385934898),
P(25.937444899999999137, 72.305171830245583919), P(25.938065999999999178, 72.305126426436075349),
P(25.939194700000001603, 72.305346959512363014), P(25.941637199999998842, 72.306187700803491225),
P(25.951531899999999098, 72.309363611414866568), P(25.958591599999998323, 72.311600021678131611),
P(25.961859900000000323, 72.312588133461261464), P(25.9623209000000017, 72.312845323461488078),
P(25.962808800000001241, 72.313126745396871797), P(25.963783500000001681, 72.313929806056449934),
P(25.964454100000001091, 72.315054565005411291), P(25.966293799999998981, 72.319575350745964215),
P(25.966609900000001687, 72.320173934482440359), P(25.966938999999999993, 72.320628647970096381),
P(25.968776200000000642, 72.322731857094510133), P(25.969766299999999859, 72.323772036806516894),
P(25.97039970000000153, 72.324406914991570261), P(25.971057800000000526, 72.324904784282267656),
P(25.972805199999999815, 72.325716763759459127), P(25.973508700000000005, 72.326106631888762877),
P(25.974174900000001287, 72.326699167072590058), P(25.974623600000001034, 72.327462886785923502),
P(25.97499170000000035, 72.32822527930542833), P(25.975826399999998984, 72.329784823533856297),
P(25.976481499999998448, 72.330935420885211329), P(25.977230399999999833, 72.332212952428704966),
P(25.978115400000000079, 72.333512265445278899), P(25.9789551000000003, 72.33474671239962106),
P(25.980276700000001, 72.336402410819303554), P(25.98169719999999927, 72.337880836033434662),
P(25.983172299999999666, 72.33911288186702393), P(25.984414600000000917, 72.340068567971513858),
P(25.985398499999998734, 72.340636603533639004), P(25.986058100000001048, 72.340908025445514795),
P(25.987230000000000274, 72.341316496490946975), P(25.988157300000001015, 72.341676869267246275),
P(25.991148400000000152, 72.342299318530393748), P(25.997876999999999015, 72.343701138883602653),
P(25.999752600000000768, 72.344154484369809666), P(26.001479700000000861, 72.344723890629211382),
P(26.003023999999999916, 72.345420432028205937), P(26.005314899999998346, 72.346859159309715892),
P(26.007066099999999409, 72.348322733682408625), P(26.008686999999998335, 72.35014618535842601),
P(26.012360000000001037, 72.354910262506038521), P(26.013286199999999582, 72.355943685106993257),
P(26.013858500000001328, 72.35652369166834319), P(26.014633599999999802, 72.357135968669368253),
P(26.015746700000001113, 72.357673410043958029), P(26.017126499999999822, 72.358212001250265644),
P(26.020520199999999988, 72.359278695677289761), P(26.021437599999998724, 72.359644892510004865),
P(26.022532699999999295, 72.360275718006846546), P(26.028545999999998628, 72.365263533617877556),
P(26.029226600000001213, 72.365797602942478761), P(26.030111600000001459, 72.366317546512846093),
P(26.032004199999999372, 72.367306080501194288), P(26.033209299999999331, 72.367834246590078351),
P(26.034265699999998844, 72.368067397148493569), P(26.035592099999998794, 72.368224167962054594),
P(26.03677019999999942, 72.368129074294643033), P(26.043432299999999202, 72.366408627750374194),
P(26.045431499999999403, 72.365842856777021552), P(26.048415399999999664, 72.36504242213915461),
P(26.052753299999999115, 72.363920454888528866), P(26.05556269999999941, 72.363008918012667436),
P(26.060303699999998628, 72.360393712052541559), P(26.065962500000001256, 72.35698705139280662),
P(26.067612400000001571, 72.356026924714299753), P(26.069255399999999412, 72.355021374242639354),
P(26.070335599999999943, 72.354163985856629893), P(26.071483900000000489, 72.353231772141796796),
P(26.073087300000000965, 72.351530224288538307), P(26.07495580000000146, 72.349052146600300262),
P(26.077375199999998756, 72.345412414793742073), P(26.079008800000000434, 72.34322240936705839),
P(26.080636800000000619, 72.341554327036718064), P(26.081818800000000635, 72.340620379333103074),
P(26.083176200000000478, 72.339615440891947173), P(26.085581000000001239, 72.338285853103528211),
P(26.092078799999999461, 72.335142167729841844), P(26.099516500000000008, 72.332061609286498083),
P(26.102282500000001164, 72.330882175026999903), P(26.105014700000001682, 72.329521843521945357),
P(26.108211900000000583, 72.327720133658942814), P(26.116759299999998234, 72.322424061632020198),
P(26.118289900000000614, 72.321345929920937579), P(26.124188000000000187, 72.316306990481081129),
P(26.126093300000000852, 72.314456217615472156), P(26.13131840000000139, 72.308768748722727082),
P(26.133807300000000851, 72.305896196846916268), P(26.135103199999999646, 72.304208818196542552),
P(26.13615610000000089, 72.3027141546473473), P(26.136958199999998698, 72.301545345164157652),
P(26.137658200000000619, 72.300474224549915903), P(26.140487000000000251, 72.29551524417688313),
P(26.146685800000000199, 72.285760107870132174), P(26.151274499999999534, 72.277504651282583836),
P(26.151979099999998368, 72.276113553331668982), P(26.152562700000000717, 72.274582520714972134),
P(26.152978600000000853, 72.272986691312326002), P(26.154697899999998612, 72.264608683472175699),
P(26.155105599999998844, 72.263003939235275652), P(26.155811400000001044, 72.261258344309723611),
P(26.156706599999999696, 72.259655777039213831), P(26.158511799999999425, 72.257073180827120495),
P(26.163497199999998344, 72.251147710512896083), P(26.164152500000000146, 72.250452144382251163),
P(26.165397099999999853, 72.249370018656591697), P(26.171159400000000517, 72.245101348184562084),
P(26.171824600000000771, 72.244502288299599968), P(26.172791700000001214, 72.243464858038208831),
P(26.173422299999998586, 72.24251111483852128), P(26.174280599999999453, 72.240982180618559028),
P(26.174924399999998315, 72.239409446329290176), P(26.175138900000000319, 72.238550480576279256),
P(26.177894599999998348, 72.222417606854094174), P(26.178249600000000896, 72.220799387733251251),
P(26.178700899999999052, 72.219414415122045625), P(26.179689899999999625, 72.217234222262234766),
P(26.182073200000001378, 72.213506738076645775), P(26.18310470000000123, 72.211533626956168064),
P(26.183614800000000855, 72.210338776927230242), P(26.18428000000000111, 72.208417574177602205),
P(26.185804499999999706, 72.203266316303412964), P(26.186153000000000901, 72.202346286216979365),
P(26.186549599999999316, 72.201465316811109574), P(26.187059699999998941, 72.200685882789031211),
P(26.187643699999998859, 72.200064170625580573), P(26.188815999999999207, 72.199110470754774838),
P(26.189986799999999789, 72.198491439723213148), P(26.190943999999998226, 72.198205925482497491),
P(26.192045499999998981, 72.198064597333782899), P(26.201502200000000187, 72.19749033573828001),
P(26.204289599999999183, 72.197194731015855496), P(26.212046699999998367, 72.196023752898682346),
P(26.217400099999998986, 72.195033541852339454), P(26.220660899999998605, 72.194099530393685882),
P(26.223864100000000121, 72.193042117073559893), P(26.227025699999998665, 72.192404096537160285),
P(26.229406099999998503, 72.192154413131575552), P(26.23379059999999896, 72.191934250652863625),
P(26.241092200000000645, 72.191652763688111349), P(26.247795599999999894, 72.191305763109099303),
P(26.259740499999999486, 72.190710990755292187), P(26.262441899999998896, 72.190662426481935654),
P(26.26396259999999927, 72.190803739092231694), P(26.265582200000000768, 72.19108065172507338),
P(26.271514700000000886, 72.192273445913514252), P(26.275603900000000124, 72.192994312937273094),
P(26.278289999999998372, 72.193506828374651718), P(26.280647800000000558, 72.193799369593079973),
P(26.284991699999999071, 72.194193426147350579), P(26.295021899999998283, 72.194996021158502231),
P(26.296629599999999272, 72.195353135208762296), P(26.298219400000000689, 72.195936520796209379),
P(26.299353599999999886, 72.196573622487093758), P(26.300700500000001369, 72.19746290844136638),
P(26.301440499999998224, 72.198127833072547332), P(26.302059899999999715, 72.198747051231549676),
P(26.302597999999999701, 72.199118470577644757), P(26.30326700000000173, 72.200164931796578571),
P(26.304018299999999186, 72.201524555689601925), P(26.305375600000001413, 72.20513574950004454),
P(26.306215500000000418, 72.206942181028665573), P(26.307179600000001329, 72.208595118825385839),
P(26.307805599999998236, 72.209443034325843769), P(26.308593200000000678, 72.210334966852684602),
P(26.309511400000001657, 72.211171854914510959), P(26.310345000000001647, 72.211829485157878139),
P(26.313103999999999161, 72.213550746524816759), P(26.313808999999999116, 72.214105903186023738),
P(26.315858999999999668, 72.21616368063173752), P(26.316473599999998356, 72.216713905276705532),
P(26.317261800000000704, 72.217105619191144683), P(26.318279199999999207, 72.217451609641841515),
P(26.31951039999999864, 72.217778930438797147), P(26.319995200000001034, 72.217883719155963718),
P(26.322028199999998321, 72.21814340535271981), P(26.323134799999998279, 72.218219615725388394),
P(26.324022500000001656, 72.218280774611798734), P(26.32581220000000144, 72.218525220186265301),
P(26.327261700000001099, 72.218861882068196678), P(26.330273800000000506, 72.219715642811124212),
P(26.337171999999998917, 72.221928497785057743), P(26.339137900000000769, 72.222394361231621929),
P(26.341438799999998821, 72.222689314479467271), P(26.343669200000000785, 72.222811640430336411),
P(26.346788899999999956, 72.222677310542948703), P(26.356923500000000615, 72.222042438730937874),
P(26.359536099999999692, 72.2221015051835451), P(26.36183730000000125, 72.222299854521224916),
P(26.366428899999998947, 72.222842507761527031), P(26.374883000000000521, 72.223912965077033732),
P(26.380090800000001394, 72.224542709845593436), P(26.39073850000000121, 72.225869670908153353),
P(26.393878699999998361, 72.226187124115313054), P(26.400813700000000495, 72.226887965488728582),
P(26.405969100000000083, 72.227408932782296347), P(26.434136200000001082, 72.23031015029567925),
P(26.437651200000001239, 72.230672215773722655), P(26.439650799999999009, 72.230860300030158783),
P(26.442400500000001529, 72.230918230849241013), P(26.444426599999999894, 72.230815518016711962),
P(26.454957100000001446, 72.229639190945519545), P(26.455386699999998257, 72.229609273288744475),
P(26.470600499999999755, 72.227804710557407475), P(26.485397899999998828, 72.226080035891357056),
P(26.487313600000000235, 72.226084418502168205), P(26.488673999999999609, 72.226209799401686951),
P(26.489974300000000085, 72.226456941463752059), P(26.493316499999998825, 72.227405883949458598),
P(26.497907399999999001, 72.228727947008763977), P(26.507186099999998419, 72.231355762593423719),
P(26.521764000000001005, 72.235531322949142918), P(26.522283200000000392, 72.235663963313356817),
P(26.52274799999999999, 72.235808991367022713), P(26.523495799999999178, 72.236006428221017472),
P(26.537509100000001183, 72.239985971537208798), P(26.540924100000001573, 72.240959309764491536),
P(26.544420699999999869, 72.241674408812258434), P(26.546888100000000321, 72.242183101965366632),
P(26.5518616999999999, 72.242874580127462991), P(26.562219100000000083, 72.244128903051048951),
P(26.564274399999998622, 72.244315309516480283), P(26.576127799999998302, 72.245028538203385438),
P(26.58263820000000166, 72.244424904560787581), P(26.591367999999999228, 72.243389190867901561),
P(26.598972199999998622, 72.242452221067154028), P(26.600826200000000199, 72.242522931717928714),
P(26.603627199999998254, 72.242683603364909573), P(26.606756300000000692, 72.243241096929352807),
P(26.612569100000001754, 72.244800578667096147), P(26.615042299999998932, 72.246052459623328446),
P(26.621848599999999863, 72.249011664844303482), P(26.627471299999999843, 72.250195383365820589),
P(26.641823800000000944, 72.252710806698729584), P(26.648778100000001245, 72.254338371527666141),
P(26.655288500000001051, 72.25700169234383452), P(26.660515000000000185, 72.259171735257126556),
P(26.662390800000000723, 72.25996099777080417), P(26.670629300000001649, 72.263625851730935779),
P(26.671595899999999801, 72.264267979553508781), P(26.676856199999999575, 72.267335711577246116),
P(26.677412499999999085, 72.267929636079472289), P(26.676856199999999575, 72.267335711577246116)};
} // namespace geometry_coding_tests

View file

@ -0,0 +1,9 @@
#pragma once
#include "geometry/point2d.hpp"
namespace geometry_coding_tests
{
using P = m2::PointD;
extern P arr1[376];
} // namespace geometry_coding_tests

View file

@ -0,0 +1,132 @@
#include "testing/testing.hpp"
#include "coding/reader.hpp"
#include "coding/text_storage.hpp"
#include "coding/writer.hpp"
#include <cstdint>
#include <random>
#include <string>
#include <vector>
using namespace coding;
using namespace std;
namespace
{
template <typename Engine>
string GenerateRandomString(Engine & engine)
{
int const kMinLength = 0;
int const kMaxLength = 400;
int const kMinByte = 0;
int const kMaxByte = 255;
uniform_int_distribution<int> length(kMinLength, kMaxLength);
uniform_int_distribution<int> byte(kMinByte, kMaxByte);
string s(length(engine), '\0');
for (auto & b : s)
b = byte(engine);
return s;
}
void DumpStrings(vector<string> const & strings, uint64_t blockSize, vector<uint8_t> & buffer)
{
MemWriter<vector<uint8_t>> writer(buffer);
BlockedTextStorageWriter<decltype(writer)> ts(writer, blockSize);
for (auto const & s : strings)
ts.Append(s);
}
UNIT_TEST(TextStorage_Smoke)
{
vector<uint8_t> buffer;
DumpStrings({} /* strings */, 10 /* blockSize */, buffer);
{
MemReader reader(buffer.data(), buffer.size());
BlockedTextStorageIndex index;
index.Read(reader);
TEST_EQUAL(index.GetNumStrings(), 0, ());
TEST_EQUAL(index.GetNumBlockInfos(), 0, ());
}
{
MemReader reader(buffer.data(), buffer.size());
BlockedTextStorage<decltype(reader)> ts(reader);
TEST_EQUAL(ts.GetNumStrings(), 0, ());
}
}
UNIT_TEST(TextStorage_Simple)
{
vector<string> const strings = {{"", "Hello", "Hello, World!", "Hola mundo", "Smoke test"}};
vector<uint8_t> buffer;
DumpStrings(strings, 10 /* blockSize */, buffer);
{
MemReader reader(buffer.data(), buffer.size());
BlockedTextStorageIndex index;
index.Read(reader);
TEST_EQUAL(index.GetNumStrings(), strings.size(), ());
TEST_EQUAL(index.GetNumBlockInfos(), 3, ());
}
{
MemReader reader(buffer.data(), buffer.size());
BlockedTextStorage<decltype(reader)> ts(reader);
TEST_EQUAL(ts.GetNumStrings(), strings.size(), ());
for (size_t i = 0; i < ts.GetNumStrings(); ++i)
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
}
}
UNIT_TEST(TextStorage_Empty)
{
vector<string> strings;
for (int i = 0; i < 1000; ++i)
{
strings.emplace_back(string(1 /* size */, i % 256));
for (int j = 0; j < 1000; ++j)
strings.emplace_back();
}
vector<uint8_t> buffer;
DumpStrings(strings, 5 /* blockSize */, buffer);
{
MemReader reader(buffer.data(), buffer.size());
BlockedTextStorage<decltype(reader)> ts(reader);
TEST_EQUAL(ts.GetNumStrings(), strings.size(), ());
for (size_t i = 0; i < ts.GetNumStrings(); ++i)
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
}
}
UNIT_TEST(TextStorage_Random)
{
int const kSeed = 42;
int const kNumStrings = 1000;
int const kBlockSize = 100;
mt19937 engine(kSeed);
vector<string> strings;
for (int i = 0; i < kNumStrings; ++i)
strings.push_back(GenerateRandomString(engine));
vector<uint8_t> buffer;
DumpStrings(strings, kBlockSize, buffer);
MemReader reader(buffer.data(), buffer.size());
BlockedTextStorage<decltype(reader)> ts(reader);
TEST_EQUAL(ts.GetNumStrings(), strings.size(), ());
for (size_t i = 0; i < ts.GetNumStrings(); ++i)
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
for (size_t i = ts.GetNumStrings() - 1; i < ts.GetNumStrings(); --i)
TEST_EQUAL(ts.ExtractString(i), strings[i], ());
}
} // namespace

View file

@ -0,0 +1,157 @@
#include "testing/testing.hpp"
#include "coding/traffic.hpp"
#include "geometry/mercator.hpp"
#include "geometry/point2d.hpp"
#include "base/logging.hpp"
#include "base/math.hpp"
#include <cstddef>
#include <cstdint>
#include <vector>
namespace traffic_test
{
using coding::TrafficGPSEncoder;
using std::vector;
double CalculateLength(vector<TrafficGPSEncoder::DataPoint> const & path)
{
double res = 0;
for (size_t i = 1; i < path.size(); ++i)
{
auto p1 = mercator::FromLatLon(path[i - 1].m_latLon.m_lat, path[i - 1].m_latLon.m_lon);
auto p2 = mercator::FromLatLon(path[i].m_latLon.m_lat, path[i].m_latLon.m_lon);
res += mercator::DistanceOnEarth(p1, p2);
}
return res;
}
void Test(vector<TrafficGPSEncoder::DataPoint> & points)
{
double constexpr kEps = 1e-5;
for (uint32_t version = 0; version <= TrafficGPSEncoder::kLatestVersion; ++version)
{
vector<uint8_t> buf;
MemWriter<decltype(buf)> memWriter(buf);
UNUSED_VALUE(TrafficGPSEncoder::SerializeDataPoints(version, memWriter, points));
vector<TrafficGPSEncoder::DataPoint> result;
MemReader memReader(buf.data(), buf.size());
ReaderSource<MemReader> src(memReader);
TrafficGPSEncoder::DeserializeDataPoints(version, src, result);
TEST_EQUAL(points.size(), result.size(), ());
for (size_t i = 0; i < points.size(); ++i)
{
TEST_EQUAL(points[i].m_timestamp, result[i].m_timestamp, (points[i].m_timestamp, result[i].m_timestamp));
TEST(AlmostEqualAbsOrRel(points[i].m_latLon.m_lat, result[i].m_latLon.m_lat, kEps),
(points[i].m_latLon.m_lat, result[i].m_latLon.m_lat));
TEST(AlmostEqualAbsOrRel(points[i].m_latLon.m_lon, result[i].m_latLon.m_lon, kEps),
(points[i].m_latLon.m_lon, result[i].m_latLon.m_lon));
}
if (version == TrafficGPSEncoder::kLatestVersion)
{
LOG(LINFO,
("path length =", CalculateLength(points), "num points =", points.size(), "compressed size =", buf.size()));
}
}
}
UNIT_TEST(Traffic_Serialization_Smoke)
{
vector<TrafficGPSEncoder::DataPoint> data = {
{0, ms::LatLon(0.0, 1.0), 1},
{0, ms::LatLon(0.0, 2.0), 2},
};
Test(data);
}
UNIT_TEST(Traffic_Serialization_EmptyPath)
{
vector<TrafficGPSEncoder::DataPoint> data;
Test(data);
}
UNIT_TEST(Traffic_Serialization_StraightLine100m)
{
vector<TrafficGPSEncoder::DataPoint> path = {
{0, ms::LatLon(0.0, 0.0), 1},
{0, ms::LatLon(0.0, 1e-3), 2},
};
Test(path);
}
UNIT_TEST(Traffic_Serialization_StraightLine50Km)
{
vector<TrafficGPSEncoder::DataPoint> path = {
{0, ms::LatLon(0.0, 0.0), 1},
{0, ms::LatLon(0.0, 0.5), 2},
};
Test(path);
}
UNIT_TEST(Traffic_Serialization_Zigzag500m)
{
vector<TrafficGPSEncoder::DataPoint> path;
for (size_t i = 0; i < 5; ++i)
{
double const x = i * 1e-3;
double const y = i % 2 == 0 ? 0 : 1e-3;
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 3));
}
Test(path);
}
UNIT_TEST(Traffic_Serialization_Zigzag10Km)
{
vector<TrafficGPSEncoder::DataPoint> path;
for (size_t i = 0; i < 10; ++i)
{
double const x = i * 1e-2;
double const y = i % 2 == 0 ? 0 : 1e-2;
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 0));
}
Test(path);
}
UNIT_TEST(Traffic_Serialization_Zigzag100Km)
{
vector<TrafficGPSEncoder::DataPoint> path;
for (size_t i = 0; i < 1000; ++i)
{
double const x = i * 1e-1;
double const y = i % 2 == 0 ? 0 : 1e-1;
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 0));
}
Test(path);
}
UNIT_TEST(Traffic_Serialization_Circle20KmRadius)
{
vector<TrafficGPSEncoder::DataPoint> path;
size_t const n = 100;
for (size_t i = 0; i < n; ++i)
{
double const alpha = 2 * math::pi * i / n;
double const radius = 0.25;
double const x = radius * cos(alpha);
double const y = radius * sin(alpha);
path.emplace_back(TrafficGPSEncoder::DataPoint(0, ms::LatLon(y, x), 0));
}
Test(path);
}
UNIT_TEST(Traffic_Serialization_ExtremeLatLon)
{
vector<TrafficGPSEncoder::DataPoint> path = {
{0, ms::LatLon(-90, -180), 0},
{0, ms::LatLon(90, 180), 0},
};
Test(path);
}
} // namespace traffic_test

View file

@ -0,0 +1,215 @@
#include "testing/testing.hpp"
#include "coding/url.hpp"
#include "base/math.hpp"
#include <queue>
#include <string>
#include <utility>
namespace url_tests
{
using namespace std;
using namespace url;
class TestUrl
{
public:
explicit TestUrl(string && url) : m_url(std::move(url)) {}
TestUrl & Scheme(string && scheme)
{
m_scheme = std::move(scheme);
return *this;
}
TestUrl & Host(string && host)
{
m_host = std::move(host);
return *this;
}
TestUrl & Path(string && path)
{
m_path = std::move(path);
return *this;
}
TestUrl & KV(string && key, string && value)
{
m_keyValuePairs.emplace(std::move(key), std::move(value));
return *this;
}
~TestUrl()
{
Url url(m_url);
TEST_EQUAL(url.GetScheme(), m_scheme, ());
TEST_EQUAL(url.GetHost(), m_host, ());
TEST_EQUAL(url.GetPath(), m_path, ());
TEST(!m_scheme.empty() || !url.IsValid(), ("Scheme is empty if and only if url is invalid!"));
url.ForEachParam([this](string const & name, string const & value)
{
TEST(!m_keyValuePairs.empty(), ("Failed for url = ", m_url));
TEST_EQUAL(m_keyValuePairs.front().first, name, ());
TEST_EQUAL(m_keyValuePairs.front().second, value, ());
m_keyValuePairs.pop();
});
}
private:
string m_url, m_scheme, m_host, m_path;
queue<pair<string, string>> m_keyValuePairs;
};
char const * orig1 = "http://google.com/main_index.php";
char const * enc1 = "http%3A%2F%2Fgoogle.com%2Fmain_index.php";
char const * orig2 = "Some File Name.ext";
char const * enc2 = "Some%20File%20Name.ext";
char const * orig3 = "Wow, two spaces?!";
char const * enc3 = "Wow%2C%20%20two%20spaces%3F%21";
char const * orig4 = "#$%^&@~[]{}()|*+`\"\'";
char const * enc4 = "%23%24%25%5E%26%40~%5B%5D%7B%7D%28%29%7C%2A%2B%60%22%27";
UNIT_TEST(Url_Join)
{
TEST_EQUAL("", Join("", ""), ());
TEST_EQUAL("omim/", Join("", "omim/"), ());
TEST_EQUAL("omim/", Join("omim/", ""), ());
TEST_EQUAL("omim/strings", Join("omim", "strings"), ());
TEST_EQUAL("omim/strings", Join("omim/", "strings"), ());
TEST_EQUAL("../../omim/strings", Join("..", "..", "omim", "strings"), ());
TEST_EQUAL("../../omim/strings", Join("../", "..", "omim/", "strings"), ());
TEST_EQUAL("omim/strings", Join("omim/", "/strings"), ());
TEST_EQUAL("../../omim/strings", Join("../", "/../", "/omim/", "/strings"), ());
TEST_EQUAL("../omim/strings", Join("../", "", "/omim/", "/strings"), ());
}
UNIT_TEST(Url_Encode)
{
TEST_EQUAL(UrlEncode(""), "", ());
TEST_EQUAL(UrlEncode(" "), "%20", ());
TEST_EQUAL(UrlEncode("%% "), "%25%25%20", ());
TEST_EQUAL(UrlEncode("20"), "20", ());
TEST_EQUAL(UrlEncode("Guinea-Bissau"), "Guinea-Bissau", ());
TEST_EQUAL(UrlEncode(orig1), enc1, ());
TEST_EQUAL(UrlEncode(orig2), enc2, ());
TEST_EQUAL(UrlEncode(orig3), enc3, ());
TEST_EQUAL(UrlEncode(orig4), enc4, ());
}
UNIT_TEST(Url_Decode)
{
TEST_EQUAL(UrlDecode(""), "", ());
TEST_EQUAL(UrlDecode("%20"), " ", ());
TEST_EQUAL(UrlDecode("%25%25%20"), "%% ", ());
TEST_EQUAL(UrlDecode("20"), "20", ());
TEST_EQUAL(UrlDecode("Guinea-Bissau"), "Guinea-Bissau", ());
TEST_EQUAL(UrlDecode(enc1), orig1, ());
TEST_EQUAL(UrlDecode(enc2), orig2, ());
TEST_EQUAL(UrlDecode(enc3), orig3, ());
TEST_EQUAL(UrlDecode(enc4), orig4, ());
TEST_EQUAL(UrlDecode("123+Main+St,+Seattle,+WA+98101"), "123 Main St, Seattle, WA 98101", ());
}
UNIT_TEST(Url_Invalid)
{
TEST(!Url("").IsValid(), ());
TEST(!Url(":/").IsValid(), ());
TEST(!Url("//").IsValid(), ());
}
UNIT_TEST(Url_Valid)
{
TestUrl("mapswithme://map?ll=10.3,12.3223&n=Hello%20World")
.Scheme("mapswithme")
.Host("map")
.KV("ll", "10.3,12.3223")
.KV("n", "Hello World");
TestUrl("cm:M&M//path?q=q&w=w").Scheme("cm").Host("M&M").Path("path").KV("q", "q").KV("w", "w");
TestUrl("http://www.sandwichparlour.com.au/").Scheme("http").Host("www.sandwichparlour.com.au").Path("");
TestUrl("cm:/&test").Scheme("cm").Host("&test").Path("");
}
UNIT_TEST(Url_Fragment)
{
TestUrl("https://www.openstreetmap.org/way/179409926#map=19/46.34998/48.03213&layers=N")
.Scheme("https")
.Host("www.openstreetmap.org")
.Path("way/179409926")
.KV("map", "19/46.34998/48.03213")
.KV("layers", "N");
TestUrl("https://www.openstreetmap.org/search?query=Falafel%20Sahyoun#map=16/33.89041/35.50664")
.Scheme("https")
.Host("www.openstreetmap.org")
.Path("search")
.KV("query", "Falafel Sahyoun")
.KV("map", "16/33.89041/35.50664");
}
UNIT_TEST(UrlScheme_Comprehensive)
{
TestUrl("");
TestUrl("scheme:").Scheme("scheme").Host("").Path("");
TestUrl("scheme:/").Scheme("scheme").Host("").Path("");
TestUrl("scheme://").Scheme("scheme").Host("").Path("");
TestUrl("sometext");
TestUrl(":noscheme");
TestUrl("://noscheme?");
TestUrl("mwm://?").Scheme("mwm").Host("").Path("");
TestUrl("http://host/path/to/something").Scheme("http").Host("host").Path("path/to/something");
TestUrl("http://host?").Scheme("http").Host("host").Path("");
TestUrl("maps://host?&&key=&").Scheme("maps").Host("host").KV("key", "");
TestUrl("mapswithme://map?ll=1.2,3.4&z=15")
.Scheme("mapswithme")
.Host("map")
.Path("")
.KV("ll", "1.2,3.4")
.KV("z", "15");
TestUrl("nopathnovalues://?key1&key2=val2")
.Scheme("nopathnovalues")
.Host("")
.Path("")
.KV("key1", "")
.KV("key2", "val2");
TestUrl("s://?key1&key2").Scheme("s").Host("").Path("").KV("key1", "").KV("key2", "");
TestUrl("g://h/p?key1=val1&key2=").Scheme("g").Host("h").Path("p").KV("key1", "val1").KV("key2", "");
TestUrl("g://h?=val1&key2=").Scheme("g").Host("h").Path("").KV("", "val1").KV("key2", "");
TestUrl("g://?k&key2").Scheme("g").Host("").Path("").KV("k", "").KV("key2", "");
TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30")
.Scheme("m")
.Host("")
.Path("")
.KV("&Amp&=&Amp&", "")
.KV("name", "1 0");
TestUrl("s://?key1=value1&key1=value2&key1=value3&key2&key2&key3=value1&key3&key3=value2")
.Scheme("s")
.Host("")
.Path("")
.KV("key1", "value1")
.KV("key1", "value2")
.KV("key1", "value3")
.KV("key2", "")
.KV("key2", "")
.KV("key3", "value1")
.KV("key3", "")
.KV("key3", "value2");
}
UNIT_TEST(UrlApi_Smoke)
{
url::Url url("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15.232");
TEST_EQUAL(url.GetScheme(), "https", ());
TEST_EQUAL(url.GetHost(), "2gis.ru", ());
TEST_EQUAL(url.GetPath(), "moscow/firm/4504127908589159", ());
TEST_EQUAL(url.GetHostAndPath(), "2gis.ru/moscow/firm/4504127908589159", ());
TEST(url.GetLastParam(), ());
TEST(url.GetParamValue("m"), ());
}
} // namespace url_tests

View file

@ -0,0 +1,76 @@
#include "testing/testing.hpp"
#include "coding/reader.hpp"
#include "coding/value_opt_string.hpp"
#include "coding/writer.hpp"
#include <algorithm>
#include <string>
#include <vector>
namespace
{
template <class T>
void TestStringCodingT(T const * arr, size_t count, size_t maxSize)
{
for (size_t i = 0; i < count; ++i)
{
auto const ethalon = strings::to_string(arr[i]);
StringNumericOptimal s;
s.Set(ethalon);
std::vector<char> buffer;
MemWriter<std::vector<char>> w(buffer);
s.Write(w);
size_t const sz = buffer.size();
TEST_GREATER(sz, 0, ());
TEST_LESS_OR_EQUAL(sz, maxSize, ());
MemReader r(&buffer[0], sz);
ReaderSource<MemReader> src(r);
s.Read(src);
TEST_EQUAL(ethalon, s.Get(), ());
}
}
} // namespace
UNIT_TEST(StringNumericOptimal_Zero)
{
int t1 = 0;
TestStringCodingT(&t1, 1, 1); // should be coded as VarUint
std::string t2 = "01";
TestStringCodingT(&t2, 1, 3); // should be coded as String
}
UNIT_TEST(StringNumericOptimal_IntCoding1)
{
int arr[] = {0, 1, 2, 666, 0x0FFFFFFF, 0x7FFFFFFF - 1, 0x7FFFFFFF};
TestStringCodingT(arr, ARRAY_SIZE(arr), 5); // should be coded as VarUint
}
UNIT_TEST(StringNumericOptimal_IntCoding2)
{
int arr[] = {-1, -2, -666666, static_cast<int>(0xFFFFFFFE), static_cast<int>(0xFFFFFFFF)};
TestStringCodingT(arr, ARRAY_SIZE(arr), 12); // should be coded as String
}
UNIT_TEST(StringNumericOptimal_StringCoding)
{
char const * arr[] = {"xxx", "yyy", "a", "0xFFFFFF", "123456UL"};
TestStringCodingT(arr, ARRAY_SIZE(arr), 12); // should be coded as String
}
UNIT_TEST(StringNumericOptimal_LargeStringCoding)
{
std::string s;
fill_n(back_inserter(s), 10000, 'x');
TestStringCodingT(&s, 1, 10006);
}

View file

@ -0,0 +1,69 @@
#include "testing/testing.hpp"
#include "coding/reader.hpp"
#include "coding/var_record_reader.hpp"
#include "coding/varint.hpp"
#include "coding/writer.hpp"
#include "base/macros.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
using namespace std;
namespace
{
struct SaveForEachParams
{
explicit SaveForEachParams(vector<pair<uint64_t, string>> & data) : m_data(data) {}
void operator()(uint64_t pos, vector<uint8_t> && data) const
{
m_data.emplace_back(pos, string(data.begin(), data.end()));
}
vector<pair<uint64_t, string>> & m_data;
};
} // namespace
UNIT_TEST(VarRecordReader_Simple)
{
vector<uint8_t> data;
char const longString[] =
"0123456789012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789012345";
size_t const longStringSize = sizeof(longString) - 1;
TEST_GREATER(longStringSize, 128, ());
{
MemWriter<vector<uint8_t>> writer(data);
WriteVarUint(writer, 3U); // 0
writer.Write("abc", 3); // 1
WriteVarUint(writer, longStringSize); // 4
writer.Write(longString, longStringSize); // 6
WriteVarUint(writer, 4U); // 6 + longStringSize
writer.Write("defg", 4); // 7 + longStringSize
// 11 + longStringSize
}
MemReader reader(&data[0], data.size());
VarRecordReader<MemReader> recordReader(reader);
auto r = recordReader.ReadRecord(0);
TEST_EQUAL(string(r.begin(), r.end()), "abc", ());
r = recordReader.ReadRecord(6 + longStringSize);
TEST_EQUAL(string(r.begin(), r.end()), "defg", ());
r = recordReader.ReadRecord(4);
TEST_EQUAL(string(r.begin(), r.end()), longString, ());
vector<pair<uint64_t, string>> forEachCalls;
recordReader.ForEachRecord(SaveForEachParams(forEachCalls));
vector<pair<uint64_t, string>> expectedForEachCalls = {{0, "abc"}, {4, longString}, {6 + longStringSize, "defg"}};
TEST_EQUAL(forEachCalls, expectedForEachCalls, ());
}

View file

@ -0,0 +1,118 @@
#include "testing/testing.hpp"
#include "coding/byte_stream.hpp"
#include "coding/hex.hpp"
#include "coding/reader.hpp"
#include "coding/var_serial_vector.hpp"
#include "coding/writer.hpp"
#include "base/macros.hpp"
#include <cstddef>
#include <cstdint>
#include <random>
#include <string>
#include <vector>
using namespace std;
char const kHexSerial[] =
"03000000"
"01000000"
"04000000"
"06000000"
"616263646566";
namespace
{
template <typename ItT, typename TDstStream>
void WriteVarSerialVector(ItT begin, ItT end, TDstStream & dst)
{
vector<uint32_t> offsets;
uint32_t offset = 0;
for (ItT it = begin; it != end; ++it)
{
offset += it->size() * sizeof((*it)[0]);
offsets.push_back(offset);
}
WriteToSink(dst, static_cast<uint32_t>(end - begin));
for (size_t i = 0; i < offsets.size(); ++i)
WriteToSink(dst, offsets[i]);
for (ItT it = begin; it != end; ++it)
{
typename ItT::value_type const & v = *it;
if (!v.empty())
dst.Write(&v[0], v.size() * sizeof(v[0]));
}
}
} // namespace
UNIT_TEST(WriteSerial)
{
vector<string> elements;
elements.push_back("a");
elements.push_back("bcd");
elements.push_back("ef");
string output;
PushBackByteSink<string> sink(output);
WriteVarSerialVector(elements.begin(), elements.end(), sink);
TEST_EQUAL(ToHex(output), kHexSerial, ());
}
UNIT_TEST(WriteSerialWithWriter)
{
string output;
MemWriter<string> writer(output);
VarSerialVectorWriter<MemWriter<string>> recordWriter(writer, 3);
writer.Write("a", 1);
recordWriter.FinishRecord();
writer.Write("bcd", 3);
recordWriter.FinishRecord();
writer.Write("ef", 2);
recordWriter.FinishRecord();
TEST_EQUAL(ToHex(output), kHexSerial, ());
}
UNIT_TEST(ReadSerial)
{
string serial(FromHex(string(kHexSerial)));
MemReader memReader(&serial[0], serial.size());
ReaderSource<MemReader> memSource(memReader);
VarSerialVectorReader<MemReader> reader(memSource);
TEST_EQUAL(reader.Read(0), "a", ());
TEST_EQUAL(reader.Read(1), "bcd", ());
TEST_EQUAL(reader.Read(2), "ef", ());
}
UNIT_TEST(EncodeDecode)
{
mt19937 rng(0);
vector<string> elements;
for (size_t i = 0; i < 1024; ++i)
{
string s(1 + (rng() % 20), 0);
for (size_t j = 0; j < s.size(); ++j)
s[j] = static_cast<char>(rng() % 26) + 'a';
elements.push_back(s);
}
string serial;
PushBackByteSink<string> sink(serial);
WriteVarSerialVector(elements.begin(), elements.end(), sink);
MemReader memReader(serial.c_str(), serial.size());
ReaderSource<MemReader> memSource(memReader);
VarSerialVectorReader<MemReader> reader(memSource);
for (size_t i = 0; i < elements.size(); ++i)
TEST_EQUAL(reader.Read(static_cast<uint32_t>(i)), elements[i], ());
}

View file

@ -0,0 +1,214 @@
#include "testing/testing.hpp"
#include "coding/byte_stream.hpp"
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "base/macros.hpp"
#include "base/stl_helpers.hpp"
#include <vector>
using namespace std;
namespace
{
template <typename T>
void TestVarUint(T const x)
{
vector<unsigned char> data;
PushBackByteSink<vector<uint8_t>> dst(data);
WriteVarUint(dst, x);
ArrayByteSource src(&data[0]);
TEST_EQUAL(ReadVarUint<T>(src), x, ());
size_t const bytesRead = src.PtrUint8() - data.data();
TEST_EQUAL(bytesRead, data.size(), (x));
}
template <typename T>
void TestVarInt(T const x)
{
vector<uint8_t> data;
PushBackByteSink<vector<uint8_t>> dst(data);
WriteVarInt(dst, x);
ArrayByteSource src(&data[0]);
TEST_EQUAL(ReadVarInt<T>(src), x, ());
size_t const bytesRead = src.PtrUint8() - data.data();
TEST_EQUAL(bytesRead, data.size(), (x));
}
} // namespace
UNIT_TEST(VarUint0)
{
// TestVarUint(static_cast<uint8_t>(0));
// TestVarUint(static_cast<uint16_t>(0));
TestVarUint(static_cast<uint32_t>(0));
TestVarUint(static_cast<uint64_t>(0));
}
UNIT_TEST(VarUintMinus1)
{
// TestVarUint(static_cast<uint8_t>(-1));
// TestVarUint(static_cast<uint16_t>(-1));
TestVarUint(static_cast<uint32_t>(-1));
TestVarUint(static_cast<uint64_t>(-1));
}
UNIT_TEST(VarUint32)
{
for (int b = 0; b <= 32; ++b)
for (uint64_t i = (1ULL << b) - 3; i <= uint32_t(-1) && i <= (1ULL << b) + 147; ++i)
TestVarUint(static_cast<uint32_t>(i));
}
UNIT_TEST(VarInt32)
{
for (int b = 0; b <= 32; ++b)
{
for (uint64_t i = (1ULL << b) - 3; i <= uint32_t(-1) && i <= (1ULL << b) + 147; ++i)
{
TestVarInt(static_cast<int32_t>(i));
TestVarInt(static_cast<int32_t>(-i));
}
}
int const bound = 10000;
for (int i = -bound; i <= bound; ++i)
TestVarInt(static_cast<int32_t>(i));
for (int i = 0; i <= bound; ++i)
TestVarUint(static_cast<uint32_t>(i));
}
UNIT_TEST(VarIntSize)
{
vector<unsigned char> data;
PushBackByteSink<vector<unsigned char>> dst(data);
WriteVarInt(dst, 60);
TEST_EQUAL(data.size(), 1, ());
data.clear();
WriteVarInt(dst, -60);
TEST_EQUAL(data.size(), 1, ());
data.clear();
WriteVarInt(dst, 120);
TEST_EQUAL(data.size(), 2, ());
data.clear();
WriteVarInt(dst, -120);
TEST_EQUAL(data.size(), 2, ());
}
UNIT_TEST(VarIntMax)
{
TestVarUint(uint32_t(-1));
TestVarUint(uint64_t(-1));
TestVarInt(int32_t(2147483647));
TestVarInt(int32_t(-2147483648LL));
TestVarInt(int64_t(9223372036854775807LL));
// TestVarInt(int64_t(-9223372036854775808LL));
}
UNIT_TEST(ReadVarInt64Array_EmptyArray)
{
vector<int64_t> result;
void const * pEnd = ReadVarInt64Array(NULL, (void *)0, base::MakeBackInsertFunctor(result));
TEST_EQUAL(result, vector<int64_t>(), ("UntilBufferEnd"));
TEST_EQUAL(reinterpret_cast<uintptr_t>(pEnd), 0, ("UntilBufferEnd"));
pEnd = ReadVarInt64Array(NULL, (size_t)0, base::MakeBackInsertFunctor(result));
TEST_EQUAL(result, vector<int64_t>(), ("GivenSize"));
TEST_EQUAL(reinterpret_cast<uintptr_t>(pEnd), 0, ("GivenSize"));
}
UNIT_TEST(ReadVarInt64Array)
{
vector<int64_t> values;
// Fill in values.
{
int64_t const baseValues[] = {0,
127,
128,
(2 << 28) - 1,
(2 << 28),
(2LL << 31),
(2LL << 31) - 1,
0xFFFFFFFF - 1,
0xFFFFFFFF,
0xFFFFFFFFFFULL};
for (size_t i = 0; i < ARRAY_SIZE(baseValues); ++i)
{
values.push_back(baseValues[i]);
values.push_back(-baseValues[i]);
}
sort(values.begin(), values.end());
values.erase(unique(values.begin(), values.end()), values.end());
}
// Test all subsets.
for (size_t i = 1; i < 1U << values.size(); ++i)
{
vector<int64_t> testValues;
for (size_t j = 0; j < values.size(); ++j)
if (i & (1 << j))
testValues.push_back(values[j]);
vector<unsigned char> data;
{
PushBackByteSink<vector<unsigned char>> dst(data);
for (size_t j = 0; j < testValues.size(); ++j)
WriteVarInt(dst, testValues[j]);
}
ASSERT_GREATER(data.size(), 0, ());
{
// Factor out variables here to show the obvious compiler error.
// clang 3.5, loop optimization.
/// @todo Need to check with the new XCode (and clang) update.
void const * pDataStart = &data[0];
void const * pDataEnd = &data[0] + data.size();
vector<int64_t> result;
void const * pEnd = ReadVarInt64Array(pDataStart, pDataEnd, base::MakeBackInsertFunctor(result));
TEST_EQUAL(pEnd, pDataEnd, ("UntilBufferEnd", data.size()));
TEST_EQUAL(result, testValues, ("UntilBufferEnd", data.size()));
}
{
vector<int64_t> result;
void const * pEnd = ReadVarInt64Array(&data[0], testValues.size(), base::MakeBackInsertFunctor(result));
TEST_EQUAL(pEnd, &data[0] + data.size(), ("GivenSize", data.size()));
TEST_EQUAL(result, testValues, ("GivenSize", data.size()));
}
}
}
UNIT_TEST(VarInt_ShortSortedArray)
{
uint32_t constexpr maxVal = (uint32_t(1) << 30) - 1;
std::vector<uint32_t> samples[] = {
{0},
{10, 10000},
{maxVal - 2, maxVal - 1, maxVal},
};
for (auto const & s : samples)
{
std::vector<uint8_t> buffer;
PushBackByteSink sink(buffer);
WriteVarUInt32SortedShortArray(s, sink);
MemReader reader(buffer.data(), buffer.size());
ReaderSource src(reader);
std::vector<uint32_t> actual;
ReadVarUInt32SortedShortArray(src, actual);
TEST_EQUAL(s, actual, ());
}
}

View file

@ -0,0 +1,259 @@
#include "testing/testing.hpp"
#include "coding/buffered_file_writer.hpp"
#include "coding/file_reader.hpp"
#include "coding/file_writer.hpp"
#include "coding/internal/file_data.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
using namespace std;
namespace
{
static char const kTestWriteStr[] = "01234567";
template <class WriterT>
void TestWrite(WriterT & writer)
{
writer.Write("01", 2); // "01"
TEST_EQUAL(writer.Pos(), 2, ());
writer.Write("x", 1); // "01x"
TEST_EQUAL(writer.Pos(), 3, ());
writer.Write("3", 1); // "01x3"
TEST_EQUAL(writer.Pos(), 4, ());
writer.Seek(2);
TEST_EQUAL(writer.Pos(), 2, ());
writer.Write("2", 1); // "0123"
TEST_EQUAL(writer.Pos(), 3, ());
writer.Seek(7);
TEST_EQUAL(writer.Pos(), 7, ());
writer.Write("7", 1); // "0123???7"
TEST_EQUAL(writer.Pos(), 8, ());
writer.Seek(4);
TEST_EQUAL(writer.Pos(), 4, ());
writer.Write("45", 2); // "012345?7"
writer.Write("6", 1); // "01234567"
}
} // namespace
UNIT_TEST(MemWriter_Smoke)
{
vector<char> s;
MemWriter<vector<char>> writer(s);
TestWrite(writer);
TEST_EQUAL(string(s.begin(), s.end()), kTestWriteStr, ());
}
UNIT_TEST(FileWriter_Smoke)
{
char const fileName[] = "file_writer_smoke_test.tmp";
{
FileWriter writer(fileName);
TestWrite(writer);
}
vector<char> s;
{
FileReader reader(fileName);
s.resize(reader.Size());
reader.Read(0, &s[0], reader.Size());
}
TEST_EQUAL(string(s.begin(), s.end()), kTestWriteStr, ());
FileWriter::DeleteFileX(fileName);
}
UNIT_TEST(SubWriter_MemWriter_Smoke)
{
vector<char> s;
MemWriter<vector<char>> writer(s);
writer.Write("aa", 2);
{
SubWriter<MemWriter<vector<char>>> subWriter(writer);
TestWrite(subWriter);
}
writer.Write("bb", 2);
TEST_EQUAL(string(s.begin(), s.end()), "aa" + string(kTestWriteStr) + "bb", ());
}
UNIT_TEST(SubWriter_FileWriter_Smoke)
{
char const fileName[] = "sub_file_writer_smoke_test.tmp";
{
FileWriter writer(fileName);
writer.Write("aa", 2);
{
SubWriter<FileWriter> subWriter(writer);
TestWrite(subWriter);
}
writer.Write("bb", 2);
}
vector<char> s;
{
FileReader reader(fileName);
s.resize(reader.Size());
reader.Read(0, &s[0], reader.Size());
}
TEST_EQUAL(string(s.begin(), s.end()), "aa" + string(kTestWriteStr) + "bb", ());
FileWriter::DeleteFileX(fileName);
}
UNIT_TEST(FileWriter_DeleteFile)
{
char const fileName[] = "delete_file_test";
{
FileWriter writer(fileName);
writer.Write("123", 3);
}
{
FileReader reader(fileName);
TEST_EQUAL(reader.Size(), 3, ());
}
FileWriter::DeleteFileX(fileName);
try
{
FileReader reader(fileName);
TEST(false, ("Exception should be thrown!"));
}
catch (FileReader::OpenException &)
{}
}
UNIT_TEST(FileWriter_AppendAndOpenExisting)
{
char const fileName[] = "append_openexisting_file_test";
{
FileWriter writer(fileName);
}
{
FileWriter writer(fileName, FileWriter::OP_WRITE_EXISTING);
TEST_EQUAL(writer.Size(), 0, ());
writer.Write("abcd", 4);
}
{
FileReader reader(fileName);
TEST_EQUAL(reader.Size(), 4, ());
string s(static_cast<uint32_t>(reader.Size()), 0);
reader.Read(0, &s[0], s.size());
TEST_EQUAL(s, "abcd", ());
}
{
FileWriter writer(fileName);
writer.Write("123", 3);
}
{
FileReader reader(fileName);
TEST_EQUAL(reader.Size(), 3, ());
}
{
FileWriter writer(fileName, FileWriter::OP_APPEND);
writer.Write("4", 1);
}
{
FileReader reader(fileName);
TEST_EQUAL(reader.Size(), 4, ());
string s(static_cast<uint32_t>(reader.Size()), 0);
reader.Read(0, &s[0], s.size());
TEST_EQUAL(s, "1234", ());
}
{
FileWriter writer(fileName, FileWriter::OP_WRITE_EXISTING);
TEST_EQUAL(writer.Size(), 4, ());
writer.Write("56", 2);
}
{
FileReader reader(fileName);
TEST_EQUAL(reader.Size(), 4, ());
string s(static_cast<uint32_t>(reader.Size()), 0);
reader.Read(0, &s[0], 4);
TEST_EQUAL(s, "5634", ());
}
FileWriter::DeleteFileX(fileName);
}
size_t const CHUNK_SIZE = 1024;
size_t const CHUNKS_COUNT = 21;
string const TEST_STRING = "Some Test String";
void WriteTestData1(Writer & w)
{
w.Seek(CHUNKS_COUNT * CHUNK_SIZE);
w.Write(TEST_STRING.data(), TEST_STRING.size());
}
void WriteTestData2(Writer & w)
{
char c[CHUNK_SIZE];
for (size_t i = 1; i < CHUNKS_COUNT; i += 2)
{
for (size_t j = 0; j < ARRAY_SIZE(c); ++j)
c[j] = i;
w.Seek(i * CHUNK_SIZE);
w.Write(&c[0], ARRAY_SIZE(c));
}
for (size_t i = 0; i < CHUNKS_COUNT; i += 2)
{
for (size_t j = 0; j < ARRAY_SIZE(c); ++j)
c[j] = i;
w.Seek(i * CHUNK_SIZE);
w.Write(&c[0], ARRAY_SIZE(c));
}
}
void ReadTestData(Reader & r)
{
string s;
r.ReadAsString(s);
for (size_t i = 0; i < CHUNKS_COUNT; ++i)
for (size_t j = 0; j < CHUNK_SIZE; ++j)
TEST_EQUAL(s[i * CHUNK_SIZE + j], static_cast<char>(i), (i, j));
string const sub = s.substr(CHUNKS_COUNT * CHUNK_SIZE);
TEST_EQUAL(sub, TEST_STRING, (sub, TEST_STRING));
}
template <typename WriterType>
void WriteToFileAndTest()
{
string const TEST_FILE = "FileWriter_Chunks.test";
{
WriterType fileWriter(TEST_FILE, FileWriter::OP_WRITE_TRUNCATE);
WriteTestData1(fileWriter);
}
{
WriterType fileWriter(TEST_FILE, FileWriter::OP_WRITE_EXISTING);
WriteTestData2(fileWriter);
}
{
FileReader r(TEST_FILE);
ReadTestData(r);
}
FileWriter::DeleteFileX(TEST_FILE);
}
UNIT_TEST(FileWriter_Chunks)
{
WriteToFileAndTest<FileWriter>();
}
UNIT_TEST(BufferedFileWriter_Smoke)
{
WriteToFileAndTest<BufferedFileWriter>();
}
UNIT_TEST(MemWriter_Chunks)
{
string buffer;
{
MemWriter<string> memWriter(buffer);
WriteTestData1(memWriter);
}
{
MemWriter<string> memWriter(buffer);
WriteTestData2(memWriter);
}
{
MemReader r(buffer.data(), buffer.size());
ReadTestData(r);
}
}

View file

@ -0,0 +1,119 @@
#include "testing/testing.hpp"
#include "coding/parse_xml.hpp"
#include "coding/reader.hpp"
#include <string>
#include <vector>
namespace
{
std::string const smokeXml = R"(
<root>
</root>
)";
std::string const longXml = R"(
<root>
<ruler>
<portrait>
<anchor vertical="bottom" horizontal="left"/>
<offset x="10"/>
</portrait>
</ruler>
<compass>
<portrait>
<anchor vertical="center"/>
<relative vertical="top"/>
</portrait>
<landscape>
<relative vertical="top"/>
<offset x="34" y="48"/>
</landscape>
</compass>
</root>
)";
class SmokeDispatcher
{
public:
void CharData(std::string const &) {}
void AddAttr(char const *, char const *) {}
bool Push(std::string_view push)
{
TEST_EQUAL(push, "root", ());
return true;
}
void Pop(std::string_view pop) { TEST_EQUAL(pop, "root", ()); }
};
class Dispatcher
{
public:
using PairsOfStrings = std::vector<std::pair<std::string, std::string>>;
using Strings = std::vector<std::string>;
void CharData(std::string const & ch) {}
void AddAttr(std::string key, std::string value) { m_addAttrs.emplace_back(std::move(key), std::move(value)); }
bool Push(std::string push)
{
m_pushes.emplace_back(std::move(push));
return true;
}
void Pop(std::string pop) { m_pops.emplace_back(std::move(pop)); }
void TestAddAttrs(PairsOfStrings const & addAttrs) { TestEquality(m_addAttrs, addAttrs); }
void TestPushes(Strings const & pushes) { TestEquality(m_pushes, pushes); }
void TestPops(Strings const & pops) { TestEquality(m_pops, pops); }
private:
template <typename F>
void TestEquality(F const & f1, F const & f2)
{
TEST_EQUAL(f1.size(), f2.size(), ());
for (size_t i = 0; i < f1.size(); ++i)
TEST_EQUAL(f1[i], f2[i], (i));
}
PairsOfStrings m_addAttrs;
Strings m_pushes;
Strings m_pops;
};
template <typename D>
void TestXML(std::string const & xmlStr, D & dispatcher)
{
std::vector<uint8_t> xml(xmlStr.cbegin(), xmlStr.cend());
MemReader reader(xml.data(), xml.size());
ReaderSource<MemReader> source(reader);
ParseXML(source, dispatcher);
}
UNIT_TEST(XmlParser_SmokeTest)
{
Dispatcher d;
TestXML(smokeXml, d);
d.TestAddAttrs({});
d.TestPushes({"root"});
d.TestPops({"root"});
}
UNIT_TEST(XmlParser_LongTest)
{
Dispatcher d;
TestXML(longXml, d);
d.TestAddAttrs({std::make_pair("vertical", "bottom"), std::make_pair("horizontal", "left"), std::make_pair("x", "10"),
std::make_pair("vertical", "center"), std::make_pair("vertical", "top"),
std::make_pair("vertical", "top"), std::make_pair("x", "34"), std::make_pair("y", "48")});
d.TestPushes({"root", "ruler", "portrait", "anchor", "offset", "compass", "portrait", "anchor", "relative",
"landscape", "relative", "offset"});
d.TestPops({"anchor", "offset", "portrait", "ruler", "anchor", "relative", "portrait", "relative", "offset",
"landscape", "compass", "root"});
}
} // namespace

View file

@ -0,0 +1,154 @@
#include "testing/testing.hpp"
#include "coding/constants.hpp"
#include "coding/file_writer.hpp"
#include "coding/internal/file_data.hpp"
#include "coding/zip_creator.hpp"
#include "coding/zip_reader.hpp"
#include "base/scope_guard.hpp"
#include <string>
#include <vector>
namespace
{
void CreateAndTestZip(std::string const & filePath, std::string const & zipPath)
{
TEST(CreateZipFromFiles({filePath}, zipPath, CompressionLevel::DefaultCompression), ());
ZipFileReader::FileList files;
ZipFileReader::FilesList(zipPath, files);
TEST_EQUAL(files[0].second, FileReader(filePath).Size(), ());
std::string const unzippedFile = "unzipped.tmp";
ZipFileReader::UnzipFile(zipPath, files[0].first, unzippedFile);
TEST(base::IsEqualFiles(filePath, unzippedFile), ());
TEST(base::DeleteFileX(filePath), ());
TEST(base::DeleteFileX(zipPath), ());
TEST(base::DeleteFileX(unzippedFile), ());
}
void CreateAndTestZip(std::vector<std::string> const & files, std::string const & zipPath, CompressionLevel compression)
{
TEST(CreateZipFromFiles(files, zipPath, compression), ());
ZipFileReader::FileList fileList;
ZipFileReader::FilesList(zipPath, fileList);
std::string const unzippedFile = "unzipped.tmp";
for (size_t i = 0; i < files.size(); ++i)
{
TEST_EQUAL(fileList[i].second, FileReader(files[i]).Size(), ());
ZipFileReader::UnzipFile(zipPath, fileList[i].first, unzippedFile);
TEST(base::IsEqualFiles(files[i], unzippedFile), ());
TEST(base::DeleteFileX(unzippedFile), ());
}
TEST(base::DeleteFileX(zipPath), ());
}
void CreateAndTestZipWithFolder(std::vector<std::string> const & files, std::vector<std::string> const & filesInArchive,
std::string const & zipPath, CompressionLevel compression)
{
TEST(CreateZipFromFiles(files, filesInArchive, zipPath, compression), ());
ZipFileReader::FileList fileList;
ZipFileReader::FilesList(zipPath, fileList);
std::string const unzippedFile = "unzipped.tmp";
for (size_t i = 0; i < files.size(); ++i)
{
TEST_EQUAL(fileList[i].second, FileReader(files[i]).Size(), ());
ZipFileReader::UnzipFile(zipPath, fileList[i].first, unzippedFile);
TEST(base::IsEqualFiles(files[i], unzippedFile), ());
TEST(base::DeleteFileX(unzippedFile), ());
}
TEST(base::DeleteFileX(zipPath), ());
}
std::vector<CompressionLevel> GetCompressionLevels()
{
return {CompressionLevel::DefaultCompression, CompressionLevel::BestCompression, CompressionLevel::BestSpeed,
CompressionLevel::NoCompression};
}
} // namespace
UNIT_TEST(CreateZip_BigFile)
{
std::string const name = "testfileforzip.txt";
{
FileWriter f(name);
std::string s(READ_FILE_BUFFER_SIZE + 1, '1');
f.Write(s.c_str(), s.size());
}
CreateAndTestZip(name, "testzip.zip");
}
UNIT_TEST(CreateZip_Smoke)
{
std::string const name = "testfileforzip.txt";
{
FileWriter f(name);
f.Write(name.c_str(), name.size());
}
CreateAndTestZip(name, "testzip.zip");
}
UNIT_TEST(CreateZip_MultipleFiles)
{
std::vector<std::string> const fileData{"testf1", "testfile2", "testfile3_longname.txt.xml.csv"};
SCOPE_GUARD(deleteFileGuard, [&fileData]()
{
for (auto const & file : fileData)
TEST(base::DeleteFileX(file), ());
});
for (auto const & name : fileData)
{
FileWriter f(name);
f.Write(name.c_str(), name.size());
}
for (auto compression : GetCompressionLevels())
CreateAndTestZip(fileData, "testzip.zip", compression);
}
UNIT_TEST(CreateZip_MultipleFilesWithFolders)
{
std::vector<std::string> const fileData{"testf1", "testfile2", "testfile3_longname.txt.xml.csv"};
std::vector<std::string> const fileInArchiveData{"testf1", "f2/testfile2", "f3/testfile3_longname.txt.xml.csv"};
SCOPE_GUARD(deleteFileGuard, [&fileData]()
{
for (auto const & file : fileData)
TEST(base::DeleteFileX(file), ());
});
for (auto const & name : fileData)
{
FileWriter f(name);
f.Write(name.c_str(), name.size());
}
for (auto compression : GetCompressionLevels())
CreateAndTestZipWithFolder(fileData, fileInArchiveData, "testzip.zip", compression);
}
UNIT_TEST(CreateZip_MultipleFilesSingleEmpty)
{
std::vector<std::string> const fileData{"singleEmptyfile.txt"};
SCOPE_GUARD(deleteFileGuard, [&fileData]() { TEST(base::DeleteFileX(fileData[0]), ()); });
{
FileWriter f(fileData[0]);
}
for (auto compression : GetCompressionLevels())
CreateAndTestZip(fileData, "testzip.zip", compression);
}

View file

@ -0,0 +1,224 @@
#include "testing/testing.hpp"
#include "coding/file_writer.hpp"
#include "coding/zip_reader.hpp"
#include "base/logging.hpp"
#include "base/macros.hpp"
#include <exception>
#include <string>
using namespace std;
static char const zipBytes[] =
"PK\003\004\n\0\0\0\0\0\222\226\342>\302\032"
"x\372\005\0\0\0\005\0\0\0\b\0\034\0te"
"st.txtUT\t\0\003\303>\017N\017"
"?\017Nux\v\0\001\004\365\001\0\0\004P\0"
"\0\0Test\nPK\001\002\036\003\n\0\0"
"\0\0\0\222\226\342>\302\032x\372\005\0\0\0\005"
"\0\0\0\b\0\030\0\0\0\0\0\0\0\0\0\244"
"\201\0\0\0\0test.txtUT\005"
"\0\003\303>\017Nux\v\0\001\004\365\001\0\0"
"\004P\0\0\0PK\005\006\0\0\0\0\001\0\001"
"\0N\0\0\0G\0\0\0\0\0";
UNIT_TEST(ZipReaderSmoke)
{
string const ZIPFILE = "smoke_test.zip";
{
FileWriter f(ZIPFILE);
f.Write(zipBytes, ARRAY_SIZE(zipBytes) - 1);
}
bool noException = true;
try
{
ZipFileReader r(ZIPFILE, "test.txt");
string s;
r.ReadAsString(s);
TEST_EQUAL(s, "Test\n", ("Invalid zip file contents"));
}
catch (exception const & e)
{
noException = false;
LOG(LERROR, (e.what()));
}
TEST(noException, ("Unhandled exception"));
// invalid zip
noException = true;
try
{
ZipFileReader r("some_nonexisting_filename", "test.txt");
}
catch (exception const &)
{
noException = false;
}
TEST(!noException, ());
// invalid file inside zip
noException = true;
try
{
ZipFileReader r(ZIPFILE, "test");
}
catch (exception const &)
{
noException = false;
}
TEST(!noException, ());
FileWriter::DeleteFileX(ZIPFILE);
}
/// zip file with 3 files inside: 1.txt, 2.txt, 3.ttt
static char const zipBytes2[] =
"\x50\x4b\x3\x4\xa\x0\x0\x0\x0\x0\x92\x6b\xf6\x3e\x53\xfc\x51\x67\x2\x0\x0"
"\x0\x2\x0\x0\x0\x5\x0\x1c\x0\x31\x2e\x74\x78\x74\x55\x54\x9\x0\x3\xd3\x50\x29\x4e\xd4\x50\x29\x4e\x75\x78"
"\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x31\xa\x50\x4b\x3\x4\xa\x0\x0\x0\x0\x0\x95\x6b\xf6\x3e\x90\xaf"
"\x7c\x4c\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x1c\x0\x32\x2e\x74\x78\x74\x55\x54\x9\x0\x3\xd9\x50\x29\x4e\xd9\x50"
"\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x32\xa\x50\x4b\x3\x4\xa\x0\x0\x0\x0\x0\x9c\x6b"
"\xf6\x3e\xd1\x9e\x67\x55\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x1c\x0\x33\x2e\x74\x74\x74\x55\x54\x9\x0\x3\xe8\x50"
"\x29\x4e\xe9\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x33\xa\x50\x4b\x1\x2\x1e\x3\xa"
"\x0\x0\x0\x0\x0\x92\x6b\xf6\x3e\x53\xfc\x51\x67\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x18\x0\x0\x0\x0\x0\x1\x0\x0"
"\x0\xa4\x81\x0\x0\x0\x0\x31\x2e\x74\x78\x74\x55\x54\x5\x0\x3\xd3\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0"
"\x0\x4\x14\x0\x0\x0\x50\x4b\x1\x2\x1e\x3\xa\x0\x0\x0\x0\x0\x95\x6b\xf6\x3e\x90\xaf\x7c\x4c\x2\x0\x0\x0\x2"
"\x0\x0\x0\x5\x0\x18\x0\x0\x0\x0\x0\x1\x0\x0\x0\xa4\x81\x41\x0\x0\x0\x32\x2e\x74\x78\x74\x55\x54\x5\x0\x3"
"\xd9\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0\x0\x0\x50\x4b\x1\x2\x1e\x3\xa\x0\x0\x0\x0\x0"
"\x9c\x6b\xf6\x3e\xd1\x9e\x67\x55\x2\x0\x0\x0\x2\x0\x0\x0\x5\x0\x18\x0\x0\x0\x0\x0\x1\x0\x0\x0\xa4\x81\x82"
"\x0\x0\x0\x33\x2e\x74\x74\x74\x55\x54\x5\x0\x3\xe8\x50\x29\x4e\x75\x78\xb\x0\x1\x4\xf5\x1\x0\x0\x4\x14\x0"
"\x0\x0\x50\x4b\x5\x6\x0\x0\x0\x0\x3\x0\x3\x0\xe1\x0\x0\x0\xc3\x0\x0\x0\x0\x0";
static char const invalidZip[] = "1234567890asdqwetwezxvcbdhg322353tgfsd";
UNIT_TEST(ZipFilesList)
{
string const ZIPFILE = "list_test.zip";
{
FileWriter f(ZIPFILE);
f.Write(zipBytes2, ARRAY_SIZE(zipBytes2) - 1);
}
TEST(ZipFileReader::IsZip(ZIPFILE), ());
string const ZIPFILE_INVALID = "invalid_test.zip";
{
FileWriter f(ZIPFILE_INVALID);
f.Write(invalidZip, ARRAY_SIZE(invalidZip) - 1);
}
TEST(!ZipFileReader::IsZip(ZIPFILE_INVALID), ());
try
{
ZipFileReader::FileList files;
ZipFileReader::FilesList(ZIPFILE, files);
TEST_EQUAL(files.size(), 3, ());
TEST_EQUAL(files[0].first, "1.txt", ());
TEST_EQUAL(files[0].second, 2, ());
TEST_EQUAL(files[1].first, "2.txt", ());
TEST_EQUAL(files[1].second, 2, ());
TEST_EQUAL(files[2].first, "3.ttt", ());
TEST_EQUAL(files[2].second, 2, ());
}
catch (exception const & e)
{
TEST(false, ("Can't get list of files inside zip", e.what()));
}
try
{
ZipFileReader::FileList files;
ZipFileReader::FilesList(ZIPFILE_INVALID, files);
TEST(false, ("This test shouldn't be reached - exception should be thrown"));
}
catch (exception const &)
{}
FileWriter::DeleteFileX(ZIPFILE_INVALID);
FileWriter::DeleteFileX(ZIPFILE);
}
/// Compressed zip file with 2 files in assets folder:
/// assets/aaaaaaaaaa.txt (contains text "aaaaaaaaaa\x0A")
/// assets/holalala.txt (contains text "Holalala\x0A")
static char const zipBytes3[] =
"\x50\x4B\x03\x04\x14\x00\x02\x00\x08\x00\xAF\x96\x56\x40\x42\xE5\x26\x8F\x06\x00"
"\x00\x00\x0B\x00\x00\x00\x15\x00\x1C\x00\x61\x73\x73\x65\x74\x73\x2F\x61\x61\x61"
"\x61\x61\x61\x61\x61\x61\x61\x2E\x74\x78\x74\x55\x54\x09\x00\x03\x7A\x0F\x45\x4F"
"\xD8\x0F\x45\x4F\x75\x78\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00\x00\x00\x4B"
"\x4C\x84\x01\x2E\x00\x50\x4B\x03\x04\x14\x00\x02\x00\x08\x00\xE6\x96\x56\x40\x5E"
"\x76\x90\x07\x08\x00\x00\x00\x09\x00\x00\x00\x13\x00\x1C\x00\x61\x73\x73\x65\x74"
"\x73\x2F\x68\x6F\x6C\x61\x6C\x61\x6C\x61\x2E\x74\x78\x74\x55\x54\x09\x00\x03\xDF"
"\x0F\x45\x4F\xDC\x0F\x45\x4F\x75\x78\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00"
"\x00\x00\xF3\xC8\xCF\x49\x04\x41\x2E\x00\x50\x4B\x01\x02\x1E\x03\x14\x00\x02\x00"
"\x08\x00\xAF\x96\x56\x40\x42\xE5\x26\x8F\x06\x00\x00\x00\x0B\x00\x00\x00\x15\x00"
"\x18\x00\x00\x00\x00\x00\x01\x00\x00\x00\xA4\x81\x00\x00\x00\x00\x61\x73\x73\x65"
"\x74\x73\x2F\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x2E\x74\x78\x74\x55\x54\x05"
"\x00\x03\x7A\x0F\x45\x4F\x75\x78\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00\x00"
"\x00\x50\x4B\x01\x02\x1E\x03\x14\x00\x02\x00\x08\x00\xE6\x96\x56\x40\x5E\x76\x90"
"\x07\x08\x00\x00\x00\x09\x00\x00\x00\x13\x00\x18\x00\x00\x00\x00\x00\x01\x00\x00"
"\x00\xA4\x81\x55\x00\x00\x00\x61\x73\x73\x65\x74\x73\x2F\x68\x6F\x6C\x61\x6C\x61"
"\x6C\x61\x2E\x74\x78\x74\x55\x54\x05\x00\x03\xDF\x0F\x45\x4F\x75\x78\x0B\x00\x01"
"\x04\xF5\x01\x00\x00\x04\x14\x00\x00\x00\x50\x4B\x05\x06\x00\x00\x00\x00\x02\x00"
"\x02\x00\xB4\x00\x00\x00\xAA\x00\x00\x00\x00\x00";
UNIT_TEST(ZipExtract)
{
string const ZIPFILE = "test.zip";
{
FileWriter f(ZIPFILE);
f.Write(zipBytes3, ARRAY_SIZE(zipBytes3));
}
TEST(ZipFileReader::IsZip(ZIPFILE), ("Not a zip file"));
ZipFileReader::FileList files;
ZipFileReader::FilesList(ZIPFILE, files);
TEST_EQUAL(files.size(), 2, ());
string const OUTFILE = "out.tmp";
string s;
ZipFileReader::UnzipFile(ZIPFILE, files[0].first, OUTFILE);
{
FileReader(OUTFILE).ReadAsString(s);
}
TEST_EQUAL(s, "aaaaaaaaaa\x0A", ());
// OUTFILE should be rewritten correctly in the next lines
ZipFileReader::UnzipFile(ZIPFILE, files[1].first, OUTFILE);
{
FileReader(OUTFILE).ReadAsString(s);
}
TEST_EQUAL(s, "Holalala\x0A", ());
FileWriter::DeleteFileX(OUTFILE);
FileWriter::DeleteFileX(ZIPFILE);
}
UNIT_TEST(ZipFileSizes)
{
string const ZIPFILE = "test.zip";
{
FileWriter f(ZIPFILE);
f.Write(zipBytes3, ARRAY_SIZE(zipBytes3));
}
TEST(ZipFileReader::IsZip(ZIPFILE), ("Not a zip file"));
ZipFileReader::FileList files;
ZipFileReader::FilesList(ZIPFILE, files);
TEST_EQUAL(files.size(), 2, ());
{
ZipFileReader file(ZIPFILE, files[0].first);
TEST_EQUAL(file.Size(), 6, ());
TEST_EQUAL(file.UncompressedSize(), 11, ());
}
{
ZipFileReader file(ZIPFILE, files[1].first);
TEST_EQUAL(file.Size(), 8, ());
TEST_EQUAL(file.UncompressedSize(), 9, ());
}
FileWriter::DeleteFileX(ZIPFILE);
}

View file

@ -0,0 +1,103 @@
#include "testing/testing.hpp"
#include "coding/zlib.hpp"
#include "base/macros.hpp"
#include "base/string_utils.hpp"
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
using namespace coding;
using namespace std;
using Deflate = ZLib::Deflate;
using Inflate = ZLib::Inflate;
pair<Deflate::Format, Inflate::Format> const g_combinations[] = {{Deflate::Format::ZLib, Inflate::Format::ZLib},
{Deflate::Format::ZLib, Inflate::Format::Both},
{Deflate::Format::GZip, Inflate::Format::GZip},
{Deflate::Format::GZip, Inflate::Format::Both}};
namespace
{
void TestDeflateInflate(string const & original)
{
for (auto const & p : g_combinations)
{
Deflate const deflate(p.first /* format */, Deflate::Level::BestCompression);
Inflate const inflate(p.second /* format */);
string compressed;
TEST(deflate(original, back_inserter(compressed)), ());
string decompressed;
TEST(inflate(compressed, back_inserter(decompressed)), ());
TEST_EQUAL(original, decompressed, ());
}
}
UNIT_TEST(ZLib_Smoke)
{
Deflate const deflate(Deflate::Format::ZLib, Deflate::Level::BestCompression);
Inflate const inflate(Inflate::Format::ZLib);
{
string s;
TEST(!deflate(nullptr /* data */, 0 /* size */, back_inserter(s) /* out */), ());
TEST(!deflate(nullptr /* data */, 4 /* size */, back_inserter(s) /* out */), ());
TEST(!inflate(nullptr /* data */, 0 /* size */, back_inserter(s) /* out */), ());
TEST(!inflate(nullptr /* data */, 4 /* size */, back_inserter(s) /* out */), ());
}
TestDeflateInflate("");
TestDeflateInflate("Hello, World!");
}
UNIT_TEST(ZLib_Large)
{
string original;
for (size_t i = 0; i < 1000; ++i)
original += strings::to_string(i);
TestDeflateInflate(original);
}
UNIT_TEST(GZip_ForeignData)
{
// To get this array of bytes, type following:
//
// echo -n 'Hello World!' | gzip -c | od -t x1
uint8_t const data[] = {0x1f, 0x8b, 0x08, 0x08, 0x6d, 0x55, 0x08, 0x59, 0x00, 0x03, 0x73, 0x61, 0x6d, 0x70, 0x6c,
0x65, 0x2e, 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x08, 0xcf,
0x2f, 0xca, 0x49, 0x51, 0x04, 0x00, 0xd0, 0xc3, 0x4a, 0xec, 0x0d, 0x00, 0x00, 0x00};
string s;
Inflate const inflate(Inflate::Format::GZip);
TEST(inflate(data, ARRAY_SIZE(data), back_inserter(s)), ());
TEST_EQUAL(s, "Hello, World!", ());
}
UNIT_TEST(GZip_ExtraDataInBuffer)
{
// Data from GZip_ForeignData + extra \n at the end of the buffer.
uint8_t const data[] = {0x1f, 0x8b, 0x08, 0x08, 0x6d, 0x55, 0x08, 0x59, 0x00, 0x03, 0x73, 0x61, 0x6d, 0x70, 0x6c,
0x65, 0x2e, 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0xd7, 0x51, 0x08, 0xcf,
0x2f, 0xca, 0x49, 0x51, 0x04, 0x00, 0xd0, 0xc3, 0x4a, 0xec, 0x0d, 0x00, 0x00, 0x00, 0x0a};
string s;
Inflate const inflate(Inflate::Format::GZip);
// inflate should fail becase there is unconsumed data at the end of buffer.
TEST(!inflate(data, ARRAY_SIZE(data), back_inserter(s)), ());
// inflate should decompress everything but the last byte.
TEST_EQUAL(s, "Hello, World!", ());
}
} // namespace

View file

@ -0,0 +1,487 @@
#include "coding/compressed_bit_vector.hpp"
#include "coding/write_to_sink.hpp"
#include "base/assert.hpp"
#include <algorithm>
#include <bit>
namespace coding
{
using std::make_unique, std::max, std::min, std::unique_ptr, std::vector;
namespace
{
struct IntersectOp
{
IntersectOp() {}
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::DenseCBV const & b) const
{
size_t const sizeA = a.NumBitGroups();
size_t const sizeB = b.NumBitGroups();
vector<uint64_t> resGroups(min(sizeA, sizeB));
for (size_t i = 0; i < resGroups.size(); ++i)
resGroups[i] = a.GetBitGroup(i) & b.GetBitGroup(i);
return coding::CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
}
// The intersection of dense and sparse is always sparse.
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::SparseCBV const & b) const
{
vector<uint64_t> resPos;
for (size_t i = 0; i < b.PopCount(); ++i)
{
auto pos = b.Select(i);
if (a.GetBit(pos))
resPos.push_back(pos);
}
return make_unique<coding::SparseCBV>(std::move(resPos));
}
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::DenseCBV const & b) const
{
return operator()(b, a);
}
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::SparseCBV const & b) const
{
vector<uint64_t> resPos;
set_intersection(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
return make_unique<coding::SparseCBV>(std::move(resPos));
}
};
struct SubtractOp
{
SubtractOp() {}
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::DenseCBV const & b) const
{
size_t const sizeA = a.NumBitGroups();
size_t const sizeB = b.NumBitGroups();
vector<uint64_t> resGroups(min(sizeA, sizeB));
for (size_t i = 0; i < resGroups.size(); ++i)
resGroups[i] = a.GetBitGroup(i) & ~b.GetBitGroup(i);
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
}
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::SparseCBV const & b) const
{
vector<uint64_t> resGroups(a.NumBitGroups());
size_t i = 0;
auto j = b.Begin();
for (; i < resGroups.size() && j < b.End(); ++i)
{
uint64_t const kBitsBegin = i * DenseCBV::kBlockSize;
uint64_t const kBitsEnd = (i + 1) * DenseCBV::kBlockSize;
uint64_t mask = 0;
for (; j < b.End() && *j < kBitsEnd; ++j)
{
ASSERT_GREATER_OR_EQUAL(*j, kBitsBegin, ());
mask |= static_cast<uint64_t>(1) << (*j - kBitsBegin);
}
resGroups[i] = a.GetBitGroup(i) & ~mask;
}
for (; i < resGroups.size(); ++i)
resGroups[i] = a.GetBitGroup(i);
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
}
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::DenseCBV const & b) const
{
vector<uint64_t> resPos;
copy_if(a.Begin(), a.End(), back_inserter(resPos), [&](uint64_t bit) { return !b.GetBit(bit); });
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
}
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::SparseCBV const & b) const
{
vector<uint64_t> resPos;
set_difference(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
}
};
struct UnionOp
{
UnionOp() {}
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::DenseCBV const & b) const
{
size_t const sizeA = a.NumBitGroups();
size_t const sizeB = b.NumBitGroups();
size_t commonSize = min(sizeA, sizeB);
size_t resultSize = max(sizeA, sizeB);
vector<uint64_t> resGroups(resultSize);
for (size_t i = 0; i < commonSize; ++i)
resGroups[i] = a.GetBitGroup(i) | b.GetBitGroup(i);
if (a.NumBitGroups() == resultSize)
for (size_t i = commonSize; i < resultSize; ++i)
resGroups[i] = a.GetBitGroup(i);
else
for (size_t i = commonSize; i < resultSize; ++i)
resGroups[i] = b.GetBitGroup(i);
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
}
unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a, coding::SparseCBV const & b) const
{
size_t const sizeA = a.NumBitGroups();
size_t const sizeB =
b.PopCount() == 0
? 0
: static_cast<size_t>((b.Select(static_cast<size_t>(b.PopCount() - 1)) + DenseCBV::kBlockSize - 1) /
DenseCBV::kBlockSize);
if (sizeB > sizeA)
{
vector<uint64_t> resPos;
auto j = b.Begin();
auto merge = [&](uint64_t va)
{
while (j < b.End() && *j < va)
{
resPos.push_back(*j);
++j;
}
resPos.push_back(va);
};
a.ForEach(merge);
for (; j < b.End(); ++j)
resPos.push_back(*j);
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
}
vector<uint64_t> resGroups(sizeA);
size_t i = 0;
auto j = b.Begin();
for (; i < sizeA || j < b.End(); ++i)
{
uint64_t const kBitsBegin = i * DenseCBV::kBlockSize;
uint64_t const kBitsEnd = (i + 1) * DenseCBV::kBlockSize;
uint64_t mask = i < sizeA ? a.GetBitGroup(i) : 0;
for (; j < b.End() && *j < kBitsEnd; ++j)
{
ASSERT_GREATER_OR_EQUAL(*j, kBitsBegin, ());
mask |= static_cast<uint64_t>(1) << (*j - kBitsBegin);
}
resGroups[i] = mask;
}
return CompressedBitVectorBuilder::FromBitGroups(std::move(resGroups));
}
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::DenseCBV const & b) const
{
return operator()(b, a);
}
unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a, coding::SparseCBV const & b) const
{
vector<uint64_t> resPos;
set_union(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
return CompressedBitVectorBuilder::FromBitPositions(std::move(resPos));
}
};
template <typename TBinaryOp>
unique_ptr<coding::CompressedBitVector> Apply(TBinaryOp const & op, CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
using strat = CompressedBitVector::StorageStrategy;
auto const stratA = lhs.GetStorageStrategy();
auto const stratB = rhs.GetStorageStrategy();
if (stratA == strat::Dense && stratB == strat::Dense)
{
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
return op(a, b);
}
if (stratA == strat::Dense && stratB == strat::Sparse)
{
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
return op(a, b);
}
if (stratA == strat::Sparse && stratB == strat::Dense)
{
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
return op(a, b);
}
if (stratA == strat::Sparse && stratB == strat::Sparse)
{
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
return op(a, b);
}
return nullptr;
}
// Returns true if a bit vector with popCount bits set out of totalBits
// is fit to be represented as a DenseCBV. Note that we do not
// account for possible irregularities in the distribution of bits.
// In particular, we do not break the bit vector into blocks that are
// stored separately although this might turn out to be a good idea.
bool DenseEnough(uint64_t popCount, uint64_t totalBits)
{
// Settle at 30% for now.
return popCount * 10 >= totalBits * 3;
}
template <typename TBitPositions>
unique_ptr<CompressedBitVector> BuildFromBitPositions(TBitPositions && setBits)
{
if (setBits.empty())
return make_unique<SparseCBV>(std::forward<TBitPositions>(setBits));
uint64_t const maxBit = *max_element(setBits.begin(), setBits.end());
if (DenseEnough(setBits.size(), maxBit))
return make_unique<DenseCBV>(std::forward<TBitPositions>(setBits));
return make_unique<SparseCBV>(std::forward<TBitPositions>(setBits));
}
} // namespace
// static
uint64_t const DenseCBV::kBlockSize;
DenseCBV::DenseCBV(vector<uint64_t> const & setBits)
{
if (setBits.empty())
return;
uint64_t const maxBit = *max_element(setBits.begin(), setBits.end());
size_t const sz = 1 + static_cast<size_t>(maxBit / kBlockSize);
m_bitGroups.resize(sz);
m_popCount = static_cast<uint64_t>(setBits.size());
for (uint64_t pos : setBits)
m_bitGroups[static_cast<size_t>(pos / kBlockSize)] |= static_cast<uint64_t>(1) << (pos % kBlockSize);
}
// static
unique_ptr<DenseCBV> DenseCBV::BuildFromBitGroups(vector<uint64_t> && bitGroups)
{
unique_ptr<DenseCBV> cbv(new DenseCBV());
cbv->m_popCount = 0;
for (size_t i = 0; i < bitGroups.size(); ++i)
cbv->m_popCount += std::popcount(bitGroups[i]);
cbv->m_bitGroups = std::move(bitGroups);
return cbv;
}
uint64_t DenseCBV::GetBitGroup(size_t i) const
{
return i < m_bitGroups.size() ? m_bitGroups[i] : 0;
}
uint64_t DenseCBV::PopCount() const
{
return m_popCount;
}
bool DenseCBV::GetBit(uint64_t pos) const
{
uint64_t bitGroup = GetBitGroup(static_cast<size_t>(pos / kBlockSize));
return ((bitGroup >> (pos % kBlockSize)) & 1) > 0;
}
unique_ptr<CompressedBitVector> DenseCBV::LeaveFirstSetNBits(uint64_t n) const
{
if (PopCount() <= n)
return Clone();
vector<uint64_t> groups;
for (size_t i = 0; i < m_bitGroups.size() && n != 0; ++i)
{
uint64_t group = m_bitGroups[i];
uint32_t const bits = std::popcount(group);
if (bits <= n)
{
n -= bits;
groups.push_back(group);
}
else
{
uint64_t part = 0;
while (n != 0)
{
part = part | (group & -group);
group = group & (group - 1);
--n;
}
groups.push_back(part);
}
}
return CompressedBitVectorBuilder::FromBitGroups(std::move(groups));
}
CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const
{
return CompressedBitVector::StorageStrategy::Dense;
}
void DenseCBV::Serialize(Writer & writer) const
{
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
WriteToSink(writer, header);
rw::WriteVectorOfPOD(writer, m_bitGroups);
}
unique_ptr<CompressedBitVector> DenseCBV::Clone() const
{
DenseCBV * cbv = new DenseCBV();
cbv->m_popCount = m_popCount;
cbv->m_bitGroups = m_bitGroups;
return unique_ptr<CompressedBitVector>(cbv);
}
SparseCBV::SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
{
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
}
SparseCBV::SparseCBV(vector<uint64_t> && setBits) : m_positions(std::move(setBits))
{
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
}
uint64_t SparseCBV::Select(size_t i) const
{
ASSERT_LESS(i, m_positions.size(), ());
return m_positions[i];
}
uint64_t SparseCBV::PopCount() const
{
return m_positions.size();
}
bool SparseCBV::GetBit(uint64_t pos) const
{
return binary_search(m_positions.begin(), m_positions.end(), pos);
}
unique_ptr<CompressedBitVector> SparseCBV::LeaveFirstSetNBits(uint64_t n) const
{
if (PopCount() <= n)
return Clone();
vector<uint64_t> positions(m_positions.begin(), m_positions.begin() + static_cast<size_t>(n));
return CompressedBitVectorBuilder::FromBitPositions(std::move(positions));
}
CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const
{
return CompressedBitVector::StorageStrategy::Sparse;
}
void SparseCBV::Serialize(Writer & writer) const
{
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
WriteToSink(writer, header);
rw::WriteVectorOfPOD(writer, m_positions);
}
unique_ptr<CompressedBitVector> SparseCBV::Clone() const
{
SparseCBV * cbv = new SparseCBV();
cbv->m_positions = m_positions;
return unique_ptr<CompressedBitVector>(cbv);
}
// static
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(vector<uint64_t> const & setBits)
{
return BuildFromBitPositions(setBits);
}
// static
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(vector<uint64_t> && setBits)
{
return BuildFromBitPositions(std::move(setBits));
}
// static
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(vector<uint64_t> && bitGroups)
{
static uint64_t constexpr kBlockSize = DenseCBV::kBlockSize;
while (!bitGroups.empty() && bitGroups.back() == 0)
bitGroups.pop_back();
if (bitGroups.empty())
return make_unique<SparseCBV>(std::move(bitGroups));
uint64_t const maxBit = kBlockSize * (bitGroups.size() - 1) + bits::FloorLog(bitGroups.back());
uint64_t popCount = 0;
for (size_t i = 0; i < bitGroups.size(); ++i)
popCount += std::popcount(bitGroups[i]);
if (DenseEnough(popCount, maxBit))
return DenseCBV::BuildFromBitGroups(std::move(bitGroups));
vector<uint64_t> setBits;
for (size_t i = 0; i < bitGroups.size(); ++i)
{
for (size_t j = 0; j < kBlockSize; ++j)
if (((bitGroups[i] >> j) & 1) > 0)
setBits.push_back(kBlockSize * i + j);
}
return make_unique<SparseCBV>(setBits);
}
std::string DebugPrint(CompressedBitVector::StorageStrategy strat)
{
switch (strat)
{
case CompressedBitVector::StorageStrategy::Dense: return "Dense";
case CompressedBitVector::StorageStrategy::Sparse: return "Sparse";
}
UNREACHABLE();
}
// static
unique_ptr<CompressedBitVector> CompressedBitVector::Intersect(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
static IntersectOp const op;
return Apply(op, lhs, rhs);
}
// static
unique_ptr<CompressedBitVector> CompressedBitVector::Subtract(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
static SubtractOp const op;
return Apply(op, lhs, rhs);
}
// static
unique_ptr<CompressedBitVector> CompressedBitVector::Union(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
static UnionOp const op;
return Apply(op, lhs, rhs);
}
// static
bool CompressedBitVector::IsEmpty(unique_ptr<CompressedBitVector> const & cbv)
{
return !cbv || cbv->PopCount() == 0;
}
// static
bool CompressedBitVector::IsEmpty(CompressedBitVector const * cbv)
{
return !cbv || cbv->PopCount() == 0;
}
} // namespace coding

View file

@ -0,0 +1,265 @@
#pragma once
#include "coding/read_write_utils.hpp"
#include "coding/reader.hpp"
#include "coding/writer.hpp"
#include "base/assert.hpp"
#include "base/control_flow.hpp"
#include "base/ref_counted.hpp"
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace coding
{
class CompressedBitVector : public base::RefCounted
{
public:
enum class StorageStrategy
{
Dense,
Sparse
};
virtual ~CompressedBitVector() = default;
// Intersects two bit vectors.
// todo(@pimenov) We expect the common use case to be as follows.
// A CBV is created in memory and several CBVs are read and intersected
// with it one by one. The in-memory CBV may initially contain a bit
// for every feature in an mwm and the intersected CBVs are read from
// the leaves of a search trie.
// Therefore an optimization of Intersect comes to mind: make a wrapper
// around TReader that will read a representation of a CBV from disk
// and intersect it bit by bit with the global in-memory CBV bypassing such
// routines as allocating memory and choosing strategy. They all can be called only
// once, namely in the end, when it is needed to pack the in-memory CBV into
// a suitable representation and pass it to the caller.
static std::unique_ptr<CompressedBitVector> Intersect(CompressedBitVector const & lhs,
CompressedBitVector const & rhs);
// Subtracts two bit vectors.
static std::unique_ptr<CompressedBitVector> Subtract(CompressedBitVector const & lhs,
CompressedBitVector const & rhs);
// Unites two bit vectors.
static std::unique_ptr<CompressedBitVector> Union(CompressedBitVector const & lhs, CompressedBitVector const & rhs);
static bool IsEmpty(std::unique_ptr<CompressedBitVector> const & cbv);
static bool IsEmpty(CompressedBitVector const * cbv);
// Returns the number of set bits (population count).
virtual uint64_t PopCount() const = 0;
// todo(@pimenov) How long will 32 bits be enough here?
// Would operator[] look better?
virtual bool GetBit(uint64_t pos) const = 0;
// Returns a subset of the current bit vector with first
// min(PopCount(), |n|) set bits.
virtual std::unique_ptr<CompressedBitVector> LeaveFirstSetNBits(uint64_t n) const = 0;
// Returns the strategy used when storing this bit vector.
virtual StorageStrategy GetStorageStrategy() const = 0;
// Writes the contents of a bit vector to writer.
// The first byte is always the header that defines the format.
// Currently the header is 0 or 1 for Dense and Sparse strategies respectively.
// It is easier to dispatch via virtual method calls and not bother
// with template TWriters here as we do in similar places in our code.
// This should not pose too much a problem because commonly
// used writers are inhereted from Writer anyway.
// todo(@pimenov). Think about rewriting Serialize and Deserialize to use the
// code in old_compressed_bit_vector.{c,h}pp.
virtual void Serialize(Writer & writer) const = 0;
// Copies a bit vector and returns a pointer to the copy.
virtual std::unique_ptr<CompressedBitVector> Clone() const = 0;
};
std::string DebugPrint(CompressedBitVector::StorageStrategy strat);
class DenseCBV : public CompressedBitVector
{
public:
friend class CompressedBitVectorBuilder;
static uint64_t constexpr kBlockSize = 64;
DenseCBV() = default;
// Builds a dense CBV from a list of positions of set bits.
explicit DenseCBV(std::vector<uint64_t> const & setBits);
// Not to be confused with the constructor: the semantics
// of the array of integers is completely different.
static std::unique_ptr<DenseCBV> BuildFromBitGroups(std::vector<uint64_t> && bitGroups);
size_t NumBitGroups() const { return m_bitGroups.size(); }
template <typename Fn>
void ForEach(Fn && f) const
{
base::ControlFlowWrapper<Fn> wrapper(std::forward<Fn>(f));
for (size_t i = 0; i < m_bitGroups.size(); ++i)
{
for (size_t j = 0; j < kBlockSize; ++j)
{
if (((m_bitGroups[i] >> j) & 1) > 0)
{
if (wrapper(kBlockSize * i + j) == base::ControlFlow::Break)
return;
}
}
}
}
// Returns 0 if the group number is too large to be contained in m_bits.
uint64_t GetBitGroup(size_t i) const;
// CompressedBitVector overrides:
uint64_t PopCount() const override;
bool GetBit(uint64_t pos) const override;
std::unique_ptr<CompressedBitVector> LeaveFirstSetNBits(uint64_t n) const override;
StorageStrategy GetStorageStrategy() const override;
void Serialize(Writer & writer) const override;
std::unique_ptr<CompressedBitVector> Clone() const override;
private:
std::vector<uint64_t> m_bitGroups;
uint64_t m_popCount = 0;
};
class SparseCBV : public CompressedBitVector
{
public:
friend class CompressedBitVectorBuilder;
using TIterator = std::vector<uint64_t>::const_iterator;
SparseCBV() = default;
explicit SparseCBV(std::vector<uint64_t> const & setBits);
explicit SparseCBV(std::vector<uint64_t> && setBits);
// Returns the position of the i'th set bit.
uint64_t Select(size_t i) const;
template <typename Fn>
void ForEach(Fn && f) const
{
base::ControlFlowWrapper<Fn> wrapper(std::forward<Fn>(f));
for (auto const & position : m_positions)
if (wrapper(position) == base::ControlFlow::Break)
return;
}
// CompressedBitVector overrides:
uint64_t PopCount() const override;
bool GetBit(uint64_t pos) const override;
std::unique_ptr<CompressedBitVector> LeaveFirstSetNBits(uint64_t n) const override;
StorageStrategy GetStorageStrategy() const override;
void Serialize(Writer & writer) const override;
std::unique_ptr<CompressedBitVector> Clone() const override;
inline TIterator Begin() const { return m_positions.cbegin(); }
inline TIterator End() const { return m_positions.cend(); }
private:
// 0-based positions of the set bits.
std::vector<uint64_t> m_positions;
};
class CompressedBitVectorBuilder
{
public:
// Chooses a strategy to store the bit vector with bits from setBits set to one
// and returns a pointer to a class that fits best.
static std::unique_ptr<CompressedBitVector> FromBitPositions(std::vector<uint64_t> const & setBits);
static std::unique_ptr<CompressedBitVector> FromBitPositions(std::vector<uint64_t> && setBits);
// Chooses a strategy to store the bit vector with bits from a bitmap obtained
// by concatenating the elements of bitGroups.
static std::unique_ptr<CompressedBitVector> FromBitGroups(std::vector<uint64_t> & bitGroups);
static std::unique_ptr<CompressedBitVector> FromBitGroups(std::vector<uint64_t> && bitGroups);
// Reads a bit vector from reader which must contain a valid
// bit vector representation (see CompressedBitVector::Serialize for the format).
template <typename TReader>
static std::unique_ptr<CompressedBitVector> DeserializeFromReader(TReader & reader)
{
ReaderSource<TReader> src(reader);
return DeserializeFromSource(src);
}
// Reads a bit vector from source which must contain a valid
// bit vector representation (see CompressedBitVector::Serialize for the format).
template <typename TSource>
static std::unique_ptr<CompressedBitVector> DeserializeFromSource(TSource & src)
{
uint8_t header = ReadPrimitiveFromSource<uint8_t>(src);
CompressedBitVector::StorageStrategy strat = static_cast<CompressedBitVector::StorageStrategy>(header);
switch (strat)
{
case CompressedBitVector::StorageStrategy::Dense:
{
std::vector<uint64_t> bitGroups;
rw::ReadVectorOfPOD(src, bitGroups);
return DenseCBV::BuildFromBitGroups(std::move(bitGroups));
}
case CompressedBitVector::StorageStrategy::Sparse:
{
std::vector<uint64_t> setBits;
rw::ReadVectorOfPOD(src, setBits);
return std::make_unique<SparseCBV>(std::move(setBits));
}
}
return std::unique_ptr<CompressedBitVector>();
}
};
// ForEach is generic and therefore cannot be virtual: a helper class is needed.
class CompressedBitVectorEnumerator
{
public:
// Executes f for each bit that is set to one using
// the bit's 0-based position as argument.
template <typename Fn>
static void ForEach(CompressedBitVector const & cbv, Fn && f)
{
CompressedBitVector::StorageStrategy strat = cbv.GetStorageStrategy();
switch (strat)
{
case CompressedBitVector::StorageStrategy::Dense:
{
DenseCBV const & denseCBV = static_cast<DenseCBV const &>(cbv);
denseCBV.ForEach(f);
return;
}
case CompressedBitVector::StorageStrategy::Sparse:
{
SparseCBV const & sparseCBV = static_cast<SparseCBV const &>(cbv);
sparseCBV.ForEach(f);
return;
}
}
}
};
class CompressedBitVectorHasher
{
public:
static uint64_t Hash(CompressedBitVector const & cbv)
{
static constexpr uint64_t kBase = 127;
uint64_t hash = 0;
CompressedBitVectorEnumerator::ForEach(cbv, [&hash](uint64_t i) { hash = hash * kBase + i + 1; });
return hash;
}
};
} // namespace coding

View file

@ -0,0 +1,4 @@
#pragma once
static size_t const READ_FILE_BUFFER_SIZE = 512 * 1024;
static unsigned int const ZIP_FILE_BUFFER_SIZE = 64 * 1024;

163
libs/coding/csv_reader.cpp Normal file
View file

@ -0,0 +1,163 @@
#include "coding/csv_reader.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
namespace coding
{
CSVReader::CSVReader(std::unique_ptr<ReaderInterface> reader, bool hasHeader, char delimiter)
: m_reader(std::move(reader))
, m_hasHeader(hasHeader)
, m_delimiter(delimiter)
{
if (!HasHeader())
return;
auto const row = ReadRow();
if (row)
m_header = *row;
}
CSVReader::CSVReader(std::string const & filename, bool hasHeader, char delimiter)
: CSVReader(std::make_unique<DefaultReader>(filename), hasHeader, delimiter)
{}
CSVReader::CSVReader(std::istream & stream, bool hasHeader, char delimiter)
: CSVReader(std::make_unique<IstreamWrapper>(stream), hasHeader, delimiter)
{}
CSVReader::CSVReader(Reader const & reader, bool hasHeader, char delimiter)
: CSVReader(std::make_unique<ReaderWrapper>(reader), hasHeader, delimiter)
{}
bool CSVReader::HasHeader() const
{
return m_hasHeader;
}
char CSVReader::GetDelimiter() const
{
return m_delimiter;
}
CSVReader::Row const & CSVReader::GetHeader() const
{
return m_header;
}
CSVReader::Rows CSVReader::ReadAll()
{
Rows file;
ForEachRow([&](auto const & row) { file.emplace_back(row); });
return file;
}
std::optional<CSVReader::Row> CSVReader::ReadRow()
{
auto const line = m_reader->ReadLine();
if (!line)
return {};
Row row;
strings::ParseCSVRow(*line, m_delimiter, row);
++m_currentLine;
return row;
}
size_t CSVReader::GetCurrentLineNumber() const
{
return m_currentLine;
}
CSVReader::IstreamWrapper::IstreamWrapper(std::istream & stream) : m_stream(stream) {}
std::optional<std::string> CSVReader::IstreamWrapper::ReadLine()
{
std::string line;
return std::getline(m_stream, line) ? line : std::optional<std::string>();
}
CSVReader::ReaderWrapper::ReaderWrapper(Reader const & reader) : m_reader(reader) {}
std::optional<std::string> CSVReader::ReaderWrapper::ReadLine()
{
std::vector<char> line;
char ch = '\0';
while (m_pos < m_reader.Size() && ch != '\n')
{
m_reader.Read(m_pos, &ch, sizeof(ch));
line.emplace_back(ch);
++m_pos;
}
if (line.empty())
return {};
auto end = std::end(line);
if (line.back() == '\n')
--end;
return std::string(std::begin(line), end);
}
CSVReader::DefaultReader::DefaultReader(std::string const & filename) : m_stream(filename)
{
if (!m_stream)
LOG(LERROR, ("Can't open file ", filename));
m_stream.exceptions(std::ios::badbit);
}
std::optional<std::string> CSVReader::DefaultReader::ReadLine()
{
return IstreamWrapper(m_stream).ReadLine();
}
CSVRunner::Iterator::Iterator(CSVReader & reader, bool isEnd) : m_reader(reader)
{
if (!isEnd)
m_current = m_reader.ReadRow();
}
CSVRunner::Iterator::Iterator(Iterator const & other) : m_reader(other.m_reader), m_current(other.m_current) {}
CSVRunner::Iterator & CSVRunner::Iterator::operator++()
{
m_current = m_reader.ReadRow();
return *this;
}
CSVRunner::Iterator CSVRunner::Iterator::operator++(int)
{
Iterator tmp(*this);
operator++();
return tmp;
}
bool CSVRunner::Iterator::operator==(Iterator const & other) const
{
return &m_reader == &other.m_reader && static_cast<bool>(m_current) == static_cast<bool>(other.m_current);
}
bool CSVRunner::Iterator::operator!=(Iterator const & other) const
{
return !(*this == other);
}
CSVReader::Row & CSVRunner::Iterator::operator*()
{
return *m_current;
}
CSVRunner::CSVRunner(CSVReader && reader) : m_reader(std::move(reader)) {}
CSVRunner::Iterator CSVRunner::begin()
{
return Iterator(m_reader);
}
CSVRunner::Iterator CSVRunner::end()
{
return Iterator(m_reader, true /* isEnd */);
}
} // namespace coding

128
libs/coding/csv_reader.hpp Normal file
View file

@ -0,0 +1,128 @@
#pragma once
#include "coding/reader.hpp"
#include <fstream>
#include <functional>
#include <optional>
#include <string>
#include <vector>
namespace coding
{
class CSVReader
{
public:
using Row = std::vector<std::string>;
using Rows = std::vector<Row>;
explicit CSVReader(std::string const & filename, bool hasHeader = false, char delimiter = ',');
explicit CSVReader(std::istream & stream, bool hasHeader = false, char delimiter = ',');
explicit CSVReader(Reader const & reader, bool hasHeader = false, char delimiter = ',');
bool HasHeader() const;
char GetDelimiter() const;
Row const & GetHeader() const;
std::optional<Row> ReadRow();
Rows ReadAll();
template <typename Fn>
void ForEachRow(Fn && fn)
{
while (auto const optRow = ReadRow())
fn(*optRow);
}
// The total number of lines read including the header. Count starts at 0.
size_t GetCurrentLineNumber() const;
private:
class ReaderInterface
{
public:
virtual ~ReaderInterface() = default;
virtual std::optional<std::string> ReadLine() = 0;
};
class IstreamWrapper : public ReaderInterface
{
public:
explicit IstreamWrapper(std::istream & stream);
// ReaderInterface overrides:
std::optional<std::string> ReadLine() override;
private:
std::istream & m_stream;
};
class ReaderWrapper : public ReaderInterface
{
public:
explicit ReaderWrapper(Reader const & reader);
// ReaderInterface overrides:
std::optional<std::string> ReadLine() override;
private:
size_t m_pos = 0;
Reader const & m_reader;
};
class DefaultReader : public ReaderInterface
{
public:
explicit DefaultReader(std::string const & filename);
// ReaderInterface overrides:
std::optional<std::string> ReadLine() override;
private:
std::ifstream m_stream;
};
explicit CSVReader(std::unique_ptr<ReaderInterface> reader, bool hasHeader, char delimiter);
std::unique_ptr<ReaderInterface> m_reader;
size_t m_currentLine = 0;
bool m_hasHeader = false;
char m_delimiter = ',';
Row m_header;
};
class CSVRunner
{
public:
explicit CSVRunner(CSVReader && reader);
class Iterator
{
public:
using iterator_category = std::input_iterator_tag;
using value_type = CSVReader::Row;
explicit Iterator(CSVReader & reader, bool isEnd = false);
Iterator(Iterator const & other);
Iterator & operator++();
Iterator operator++(int);
// Checks whether both this and other are equal. Two CSVReader iterators are equal if both of
// them are end-of-file iterators or not and both of them refer to the same CSVReader.
bool operator==(Iterator const & other) const;
bool operator!=(Iterator const & other) const;
CSVReader::Row & operator*();
private:
CSVReader & m_reader;
std::optional<CSVReader::Row> m_current;
};
// Warning: It reads first line.
Iterator begin();
Iterator end();
private:
CSVReader m_reader;
};
} // namespace coding

163
libs/coding/dd_vector.hpp Normal file
View file

@ -0,0 +1,163 @@
#pragma once
#include "coding/reader.hpp"
#include "base/assert.hpp"
#include "base/exception.hpp"
#include <type_traits>
#include <boost/iterator/iterator_facade.hpp>
// Disk-driven vector.
template <typename T, class TReader, typename TSize = uint32_t>
class DDVector
{
public:
typedef T value_type;
typedef TSize size_type;
typedef std::make_signed_t<size_type> difference_type;
typedef TReader ReaderType;
DECLARE_EXCEPTION(OpenException, RootException);
DDVector() : m_Size(0) {}
explicit DDVector(TReader const & reader) : m_reader(reader) { InitSize(); }
void Init(TReader const & reader)
{
m_reader = reader;
InitSize();
}
size_type size() const { return m_Size; }
T const operator[](size_type i) const
{
return ReadPrimitiveFromPos<T>(m_reader, static_cast<uint64_t>(i) * sizeof(T));
}
class const_iterator
: public boost::iterator_facade<const_iterator, value_type const, boost::random_access_traversal_tag,
value_type const &, difference_type>
{
public:
#ifdef DEBUG
const_iterator(ReaderType const * pReader, size_type i, size_type size)
: m_pReader(pReader)
, m_I(i)
, m_bValueRead(false)
, m_Size(size)
{
ASSERT(static_cast<difference_type>(m_Size) >= 0, ());
}
#else
const_iterator(ReaderType const * pReader, size_type i) : m_pReader(pReader), m_I(i), m_bValueRead(false) {}
#endif
T const & dereference() const
{
ASSERT_LESS(m_I, m_Size, (m_bValueRead));
if (!m_bValueRead)
{
m_Value = ReadPrimitiveFromPos<T>(*m_pReader, static_cast<uint64_t>(m_I) * sizeof(T));
m_bValueRead = true;
}
return m_Value;
}
void advance(difference_type n)
{
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
m_I += n;
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
m_bValueRead = false;
}
difference_type distance_to(const_iterator const & it) const
{
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
ASSERT_LESS_OR_EQUAL(it.m_I, it.m_Size, (it.m_bValueRead));
ASSERT_EQUAL(m_Size, it.m_Size, (m_I, it.m_I, m_bValueRead, it.m_bValueRead));
ASSERT(m_pReader == it.m_pReader, (m_I, m_Size, it.m_I, it.m_Size));
return (static_cast<difference_type>(it.m_I) - static_cast<difference_type>(m_I));
}
void increment()
{
++m_I;
m_bValueRead = false;
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
}
void decrement()
{
--m_I;
m_bValueRead = false;
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
}
bool equal(const_iterator const & it) const
{
ASSERT_LESS_OR_EQUAL(m_I, m_Size, (m_bValueRead));
ASSERT_LESS_OR_EQUAL(it.m_I, it.m_Size, (it.m_bValueRead));
ASSERT_EQUAL(m_Size, it.m_Size, (m_I, it.m_I, m_bValueRead, it.m_bValueRead));
ASSERT(m_pReader == it.m_pReader, (m_I, m_Size, it.m_I, it.m_Size));
return m_I == it.m_I;
}
private:
ReaderType const * m_pReader;
size_type m_I;
mutable T m_Value = {};
mutable bool m_bValueRead;
#ifdef DEBUG
size_type m_Size;
#endif
};
const_iterator begin() const
{
#ifdef DEBUG
return const_iterator(&m_reader, 0, m_Size);
#else
return const_iterator(&m_reader, 0);
#endif
}
const_iterator end() const
{
#ifdef DEBUG
return const_iterator(&m_reader, m_Size, m_Size);
#else
return const_iterator(&m_reader, m_Size);
#endif
}
void Read(size_type i, T & result) const
{
ASSERT_LESS(i, m_Size, ());
ReadFromPos(m_reader, i * sizeof(T), &result, sizeof(T));
}
void Read(size_type i, T * result, size_t count)
{
ASSERT_LESS(i + count, m_Size, (i, count));
ReadFromPos(m_reader, i * sizeof(T), result, count * sizeof(T));
}
private:
void InitSize()
{
uint64_t const sz = m_reader.Size();
if ((sz % sizeof(T)) != 0)
MYTHROW(OpenException, ("Element size", sizeof(T), "does not divide total size", sz));
m_Size = static_cast<size_type>(sz / sizeof(T));
}
// TODO: Refactor me to use Reader by pointer.
ReaderType m_reader;
size_type m_Size;
};

210
libs/coding/diff.hpp Normal file
View file

@ -0,0 +1,210 @@
#pragma once
#include "base/assert.hpp"
#include "base/base.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <unordered_map>
#include <utility>
#include <vector>
namespace diff
{
enum Operation
{
OPERATION_COPY = 0,
OPERATION_DELETE = 1,
OPERATION_INSERT = 2,
};
template <class PatchWriterT, typename SizeT = uint64_t>
class PatchCoder
{
public:
typedef SizeT size_type;
explicit PatchCoder(PatchWriterT & patchWriter)
: m_LastOperation(OPERATION_COPY)
, m_LastOpCode(0)
, m_PatchWriter(patchWriter)
{}
void Delete(size_type n)
{
if (n != 0)
Op(OPERATION_DELETE, n);
}
void Copy(size_type n)
{
if (n != 0)
Op(OPERATION_COPY, n);
}
template <typename TIter>
void Insert(TIter it, size_type n)
{
if (n != 0)
{
Op(OPERATION_INSERT, n);
m_PatchWriter.WriteData(it, n);
}
}
void Finalize() { WriteLasOp(); }
private:
void Op(Operation op, size_type n)
{
if (m_LastOperation == op)
{
m_LastOpCode += (n << 1);
return;
}
WriteLasOp();
m_LastOpCode = (n << 1) | ((m_LastOperation + 1) % 3 == op ? 0 : 1);
m_LastOperation = op;
}
void WriteLasOp()
{
if (m_LastOpCode != 0)
m_PatchWriter.WriteOperation(m_LastOpCode);
else
CHECK_EQUAL(m_LastOperation, OPERATION_COPY, ()); // "We were just initialized."
}
Operation m_LastOperation;
size_type m_LastOpCode;
PatchWriterT & m_PatchWriter;
};
// Find minimal patch, with no more than maxPatchSize edited values, that transforms A into B.
// Returns the length of the minimal patch, or -1 if no such patch found.
// Intermediate information is saved into tmpSink and can be used later to restore
// the resulting patch.
template <typename TSignedWord, // Signed word, capable of storing position in text.
class TSrcVector, // Source data (A).
class TDstVector, // Destination data (B).
class TTmpFileSink // Sink to store temporary information.
>
TSignedWord DiffMyersSimple(TSrcVector const & A, TDstVector const & B, TSignedWord maxPatchSize,
TTmpFileSink & tmpSink)
{
ASSERT_GREATER(maxPatchSize, 0, ());
std::vector<TSignedWord> V(2 * maxPatchSize + 1);
for (TSignedWord d = 0; d <= maxPatchSize; ++d)
{
for (TSignedWord k = -d; k <= d; k += 2)
{
TSignedWord x;
if (k == -d || (k != d && V[maxPatchSize + k - 1] < V[maxPatchSize + k + 1]))
x = V[maxPatchSize + k + 1];
else
x = V[maxPatchSize + k - 1] + 1;
while (x < static_cast<TSignedWord>(A.size()) && x - k < static_cast<TSignedWord>(B.size()) && A[x] == B[x - k])
++x;
V[maxPatchSize + k] = x;
if (x == static_cast<TSignedWord>(A.size()) && x - k == static_cast<TSignedWord>(B.size()))
return d;
}
tmpSink.Write(&V[maxPatchSize - d], (2 * d + 1) * sizeof(TSignedWord));
}
return -1;
}
// Differ that just replaces old with new, with the only optimization of skipping equal values
// at the beginning and at the end.
class SimpleReplaceDiffer
{
public:
template <typename SrcIterT, typename DstIterT, class PatchCoderT>
void Diff(SrcIterT srcBeg, SrcIterT srcEnd, DstIterT dstBeg, DstIterT dstEnd, PatchCoderT & patchCoder)
{
typename PatchCoderT::size_type begCopy = 0;
for (; srcBeg != srcEnd && dstBeg != dstEnd && *srcBeg == *dstBeg; ++srcBeg, ++dstBeg)
++begCopy;
patchCoder.Copy(begCopy);
typename PatchCoderT::size_type endCopy = 0;
for (; srcBeg != srcEnd && dstBeg != dstEnd && *(srcEnd - 1) == *(dstEnd - 1); --srcEnd, --dstEnd)
++endCopy;
patchCoder.Delete(srcEnd - srcBeg);
patchCoder.Insert(dstBeg, dstEnd - dstBeg);
patchCoder.Copy(endCopy);
}
};
// Given FineGrainedDiff and rolling Hasher, DiffWithRollingHash splits the source sequence
// into chunks of size m_BlockSize, finds equal chunks in the destination sequence, using rolling
// hash to find good candidates, writes info about equal chunks into patchCoder and for everything
// between equal chunks, calls FineGrainedDiff::Diff().
template <class FineGrainedDiffT, class HasherT,
class HashPosMultiMapT = std::unordered_multimap<typename HasherT::hash_type, uint64_t>>
class RollingHashDiffer
{
public:
explicit RollingHashDiffer(size_t blockSize, FineGrainedDiffT const & fineGrainedDiff = FineGrainedDiffT())
: m_FineGrainedDiff(fineGrainedDiff)
, m_BlockSize(blockSize)
{}
template <typename SrcIterT, typename DstIterT, class PatchCoderT>
void Diff(SrcIterT const srcBeg, SrcIterT const srcEnd, DstIterT const dstBeg, DstIterT const dstEnd,
PatchCoderT & patchCoder)
{
if (srcEnd - srcBeg < static_cast<decltype(srcEnd - srcBeg)>(m_BlockSize) ||
dstEnd - dstBeg < static_cast<decltype(dstEnd - dstBeg)>(m_BlockSize))
{
m_FineGrainedDiff.Diff(srcBeg, srcEnd, dstBeg, dstEnd, patchCoder);
return;
}
HasherT hasher;
HashPosMultiMapT srcHashes;
for (SrcIterT src = srcBeg; srcEnd - src >= static_cast<decltype(srcEnd - src)>(m_BlockSize); src += m_BlockSize)
srcHashes.insert(HashPosMultiMapValue(hasher.Init(src, m_BlockSize), src - srcBeg));
SrcIterT srcLastDiff = srcBeg;
DstIterT dst = dstBeg, dstNext = dstBeg + m_BlockSize, dstLastDiff = dstBeg;
hash_type h = hasher.Init(dst, m_BlockSize);
while (dstNext != dstEnd)
{
std::pair<HashPosMultiMapIterator, HashPosMultiMapIterator> iters = srcHashes.equal_range(h);
if (iters.first != iters.second)
{
pos_type const srcLastDiffPos = srcLastDiff - srcBeg;
HashPosMultiMapIterator it = srcHashes.end();
for (HashPosMultiMapIterator i = iters.first; i != iters.second; ++i)
if (i->second >= srcLastDiffPos && (it == srcHashes.end() || i->second < it->second))
it = i;
if (it != srcHashes.end() && std::equal(srcBeg + it->second, srcBeg + it->second + m_BlockSize, dst))
{
pos_type srcBlockEqualPos = it->second;
m_FineGrainedDiff.Diff(srcLastDiff, srcBeg + srcBlockEqualPos, dstLastDiff, dst, patchCoder);
patchCoder.Copy(m_BlockSize);
srcLastDiff = srcBeg + srcBlockEqualPos + m_BlockSize;
dst = dstLastDiff = dstNext;
if (dstEnd - dstNext < static_cast<decltype(dstEnd - dstNext)>(m_BlockSize))
break;
dstNext = dst + m_BlockSize;
h = hasher.Init(dst, m_BlockSize);
continue;
}
}
h = hasher.Scroll(*(dst++), *(dstNext++));
}
if (srcLastDiff != srcEnd || dstLastDiff != dstEnd)
m_FineGrainedDiff.Diff(srcLastDiff, srcEnd, dstLastDiff, dstEnd, patchCoder);
}
private:
typedef typename HasherT::hash_type hash_type;
typedef typename HashPosMultiMapT::value_type::second_type pos_type;
typedef typename HashPosMultiMapT::const_iterator HashPosMultiMapIterator;
typedef typename HashPosMultiMapT::value_type HashPosMultiMapValue;
FineGrainedDiffT m_FineGrainedDiff;
HasherT m_Hasher;
size_t m_BlockSize;
};
} // namespace diff

View file

@ -0,0 +1,76 @@
#pragma once
#include "coding/bit_streams.hpp"
#include "base/assert.hpp"
#include "base/bits.hpp"
#include <cstdint>
namespace coding
{
class GammaCoder
{
public:
template <typename TWriter>
static bool Encode(BitWriter<TWriter> & writer, uint64_t value)
{
if (value == 0)
return false;
uint8_t const n = bits::FloorLog(value);
ASSERT_LESS_OR_EQUAL(n, 63, ());
uint64_t const msb = static_cast<uint64_t>(1) << n;
writer.WriteAtMost64Bits(msb, n + 1);
writer.WriteAtMost64Bits(value, n);
return true;
}
template <typename TReader>
static uint64_t Decode(BitReader<TReader> & reader)
{
uint8_t n = 0;
while (reader.Read(1) == 0)
++n;
ASSERT_LESS_OR_EQUAL(n, 63, ());
uint64_t const msb = static_cast<uint64_t>(1) << n;
return msb | reader.ReadAtMost64Bits(n);
}
};
class DeltaCoder
{
public:
template <typename TWriter>
static bool Encode(BitWriter<TWriter> & writer, uint64_t value)
{
if (value == 0)
return false;
uint8_t const n = bits::FloorLog(value);
ASSERT_LESS_OR_EQUAL(n, 63, ());
if (!GammaCoder::Encode(writer, n + 1))
return false;
writer.WriteAtMost64Bits(value, n);
return true;
}
template <typename TReader>
static uint64_t Decode(BitReader<TReader> & reader)
{
uint8_t n = GammaCoder::Decode(reader);
ASSERT_GREATER(n, 0, ());
--n;
ASSERT_LESS_OR_EQUAL(n, 63, ());
uint64_t const msb = static_cast<uint64_t>(1) << n;
return msb | reader.ReadAtMost64Bits(n);
}
};
} // namespace coding

View file

@ -0,0 +1,54 @@
#pragma once
#include "base/base.hpp"
#include <cstddef>
#include <type_traits>
// #define ENDIAN_IS_BIG
// @TODO(bykoianko) This method returns false since 05.12.2010. That means only little-endian
// architectures are supported. When it's necessary to support a big-endian system:
// * method IsBigEndianMacroBased() should be implemented based on IsLittleEndian() function
// * method SwapIfBigEndianMacroBased() should be implemented based on IsLittleEndian() function
// * all serialization and deserialization of rs_bit_vector and the other rank-select structures
// should be implemented taking endianness into account
inline bool IsBigEndianMacroBased()
{
#ifdef ENDIAN_IS_BIG
return true;
#else
return false;
#endif
}
template <typename T>
T ReverseByteOrder(T t)
{
static_assert(std::is_integral<T>::value, "Only integral types are supported.");
T res;
char const * a = reinterpret_cast<char const *>(&t);
char * b = reinterpret_cast<char *>(&res);
for (size_t i = 0; i < sizeof(T); ++i)
b[i] = a[sizeof(T) - 1 - i];
return res;
}
template <typename T>
T SwapIfBigEndianMacroBased(T t)
{
#ifdef ENDIAN_IS_BIG
return ReverseByteOrder(t);
#else
return t;
#endif
}
inline bool IsLittleEndian()
{
uint16_t const word = 0x0001;
uint8_t const * b = reinterpret_cast<uint8_t const *>(&word);
return b[0] != 0x0;
}

134
libs/coding/file_reader.cpp Normal file
View file

@ -0,0 +1,134 @@
#include "coding/file_reader.hpp"
#include "coding/internal/file_data.hpp"
#include "coding/reader_cache.hpp"
#include "base/logging.hpp"
#ifndef LOG_FILE_READER_STATS
#define LOG_FILE_READER_STATS 0
#endif // LOG_FILE_READER_STATS
#if LOG_FILE_READER_STATS && !defined(LOG_FILE_READER_EVERY_N_READS_MASK)
#define LOG_FILE_READER_EVERY_N_READS_MASK 0xFFFFFFFF
#endif
// static
uint32_t const FileReader::kDefaultLogPageSize = 10; // page size is 2^10 = 1024 = 1kb
// static
uint32_t const FileReader::kDefaultLogPageCount = 4; // page count is 2^4 = 16, i.e. 16 pages are cached
class FileReader::FileReaderData
{
public:
FileReaderData(std::string const & fileName, uint32_t logPageSize, uint32_t logPageCount)
: m_fileData(fileName)
, m_readerCache(logPageSize, logPageCount)
{
#if LOG_FILE_READER_STATS
m_readCallCount = 0;
#endif
}
~FileReaderData()
{
#if LOG_FILE_READER_STATS
LOG(LINFO, ("FileReader", m_fileData.GetName(), m_readerCache.GetStatsStr()));
#endif
}
uint64_t Size() const { return m_fileData.Size(); }
void Read(uint64_t pos, void * p, size_t size)
{
#if LOG_FILE_READER_STATS
if (((++m_readCallCount) & LOG_FILE_READER_EVERY_N_READS_MASK) == 0)
LOG(LINFO, ("FileReader", m_fileData.GetName(), m_readerCache.GetStatsStr()));
#endif
return m_readerCache.Read(m_fileData, pos, p, size);
}
private:
class FileDataWithCachedSize : public base::FileData
{
public:
explicit FileDataWithCachedSize(std::string const & fileName)
: base::FileData(fileName, Op::READ)
, m_Size(FileData::Size())
{}
uint64_t Size() const { return m_Size; }
private:
uint64_t m_Size;
};
FileDataWithCachedSize m_fileData;
ReaderCache<FileDataWithCachedSize, LOG_FILE_READER_STATS> m_readerCache;
#if LOG_FILE_READER_STATS
uint32_t m_readCallCount;
#endif
};
FileReader::FileReader(std::string const & fileName) : FileReader(fileName, kDefaultLogPageSize, kDefaultLogPageCount)
{}
FileReader::FileReader(std::string const & fileName, uint32_t logPageSize, uint32_t logPageCount)
: ModelReader(fileName)
, m_logPageSize(logPageSize)
, m_logPageCount(logPageCount)
, m_fileData(std::make_shared<FileReaderData>(fileName, logPageSize, logPageCount))
, m_offset(0)
, m_size(m_fileData->Size())
{}
FileReader::FileReader(FileReader const & reader, uint64_t offset, uint64_t size, uint32_t logPageSize,
uint32_t logPageCount)
: ModelReader(reader.GetName())
, m_logPageSize(logPageSize)
, m_logPageCount(logPageCount)
, m_fileData(reader.m_fileData)
, m_offset(offset)
, m_size(size)
{}
void FileReader::Read(uint64_t pos, void * p, size_t size) const
{
CheckPosAndSize(pos, size);
m_fileData->Read(m_offset + pos, p, size);
}
FileReader FileReader::SubReader(uint64_t pos, uint64_t size) const
{
CheckPosAndSize(pos, size);
return FileReader(*this, m_offset + pos, size, m_logPageSize, m_logPageCount);
}
std::unique_ptr<Reader> FileReader::CreateSubReader(uint64_t pos, uint64_t size) const
{
CheckPosAndSize(pos, size);
// Can't use make_unique with private constructor.
return std::unique_ptr<Reader>(new FileReader(*this, m_offset + pos, size, m_logPageSize, m_logPageCount));
}
void FileReader::CheckPosAndSize(uint64_t pos, uint64_t size) const
{
uint64_t const allSize1 = Size();
bool const ret1 = (pos + size <= allSize1);
if (!ret1)
MYTHROW(Reader::SizeException, (pos, size, allSize1));
uint64_t const allSize2 = m_fileData->Size();
bool const ret2 = (m_offset + pos + size <= allSize2);
if (!ret2)
MYTHROW(Reader::SizeException, (pos, size, allSize2));
}
void FileReader::SetOffsetAndSize(uint64_t offset, uint64_t size)
{
CheckPosAndSize(offset, size);
m_offset = offset;
m_size = size;
}

View file

@ -0,0 +1,49 @@
#pragma once
#include "coding/reader.hpp"
#include "base/base.hpp"
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
// FileReader, cheap to copy, not thread safe.
// It is assumed that file is not modified during FireReader lifetime,
// because of caching and assumption that Size() is constant.
class FileReader : public ModelReader
{
public:
static uint32_t const kDefaultLogPageSize;
static uint32_t const kDefaultLogPageCount;
explicit FileReader(std::string const & fileName);
FileReader(std::string const & fileName, uint32_t logPageSize, uint32_t logPageCount);
// Reader overrides:
uint64_t Size() const override { return m_size; }
void Read(uint64_t pos, void * p, size_t size) const override;
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const override;
FileReader SubReader(uint64_t pos, uint64_t size) const;
uint64_t GetOffset() const { return m_offset; }
protected:
// Used in special derived readers.
void SetOffsetAndSize(uint64_t offset, uint64_t size);
private:
class FileReaderData;
FileReader(FileReader const & reader, uint64_t offset, uint64_t size, uint32_t logPageSize, uint32_t logPageCount);
// Throws an exception if a (pos, size) read would result in an out-of-bounds access.
void CheckPosAndSize(uint64_t pos, uint64_t size) const;
uint32_t m_logPageSize;
uint32_t m_logPageCount;
std::shared_ptr<FileReaderData> m_fileData;
uint64_t m_offset;
uint64_t m_size;
};

142
libs/coding/file_sort.hpp Normal file
View file

@ -0,0 +1,142 @@
#pragma once
#include "coding/file_reader.hpp"
#include "coding/file_writer.hpp"
#include "base/base.hpp"
#include "base/exception.hpp"
#include "base/logging.hpp"
#include <algorithm>
#include <cstdlib>
#include <exception>
#include <functional>
#include <memory>
#include <queue>
#include <string>
#include <utility>
#include <vector>
template <typename LessT>
struct Sorter
{
LessT m_Less;
Sorter(LessT lessF) : m_Less(lessF) {}
template <typename IterT>
void operator()(IterT beg, IterT end) const
{
std::sort(beg, end, m_Less);
}
};
template <typename T, // Item type.
class OutputSinkT = FileWriter, // Sink to output into result file.
typename LessT = std::less<T>, // Item comparator.
template <typename LessT1> class SorterT = Sorter // Item sorter.
>
class FileSorter
{
public:
FileSorter(size_t bufferBytes, std::string const & tmpFileName, OutputSinkT & outputSink, LessT fLess = LessT())
: m_TmpFileName(tmpFileName)
, m_BufferCapacity(std::max(size_t(16), bufferBytes / sizeof(T)))
, m_OutputSink(outputSink)
, m_ItemCount(0)
, m_Less(fLess)
{
m_Buffer.reserve(m_BufferCapacity);
m_pTmpWriter.reset(new FileWriter(tmpFileName));
}
void Add(T const & item)
{
if (m_Buffer.size() == m_BufferCapacity)
FlushToTmpFile();
m_Buffer.push_back(item);
++m_ItemCount;
}
void SortAndFinish()
{
ASSERT(m_pTmpWriter.get(), ());
FlushToTmpFile();
// Write output.
{
m_pTmpWriter.reset();
FileReader reader(m_TmpFileName);
ItemIndexPairGreater fGreater(m_Less);
PriorityQueue q(fGreater);
for (uint32_t i = 0; i < m_ItemCount; i += m_BufferCapacity)
Push(q, i, reader);
while (!q.empty())
{
m_OutputSink(q.top().first);
uint32_t const i = q.top().second + 1;
q.pop();
if (i % m_BufferCapacity != 0 && i < m_ItemCount)
Push(q, i, reader);
}
}
FileWriter::DeleteFileX(m_TmpFileName);
}
~FileSorter()
{
if (m_pTmpWriter.get())
{
try
{
SortAndFinish();
}
catch (RootException const & e)
{
LOG(LERROR, (e.Msg()));
}
catch (std::exception const & e)
{
LOG(LERROR, (e.what()));
}
}
}
private:
struct ItemIndexPairGreater
{
explicit ItemIndexPairGreater(LessT fLess) : m_Less(fLess) {}
inline bool operator()(std::pair<T, uint32_t> const & a, std::pair<T, uint32_t> const & b) const
{
return m_Less(b.first, a.first);
}
LessT m_Less;
};
using PriorityQueue =
std::priority_queue<std::pair<T, uint32_t>, std::vector<std::pair<T, uint32_t>>, ItemIndexPairGreater>;
void FlushToTmpFile()
{
if (m_Buffer.empty())
return;
SorterT<LessT> sorter(m_Less);
sorter(m_Buffer.begin(), m_Buffer.end());
m_pTmpWriter->Write(&m_Buffer[0], m_Buffer.size() * sizeof(T));
m_Buffer.clear();
}
void Push(PriorityQueue & q, uint32_t i, FileReader const & reader)
{
T item;
reader.Read(static_cast<uint64_t>(i) * sizeof(T), &item, sizeof(T));
q.push(std::pair<T, uint32_t>(item, i));
}
std::string const m_TmpFileName;
size_t const m_BufferCapacity;
OutputSinkT & m_OutputSink;
std::unique_ptr<FileWriter> m_pTmpWriter;
std::vector<T> m_Buffer;
uint32_t m_ItemCount;
LessT m_Less;
};

View file

@ -0,0 +1,49 @@
#include "coding/file_writer.hpp"
#include "coding/internal/file_data.hpp"
#include <vector>
FileWriter::FileWriter(std::string const & fileName, FileWriter::Op op)
: m_pFileData(std::make_unique<base::FileData>(fileName, static_cast<base::FileData::Op>(op)))
{}
FileWriter::~FileWriter() noexcept(false)
{
// Note: FileWriter::Flush will be called (like non virtual method).
Flush();
}
uint64_t FileWriter::Pos() const
{
return m_pFileData->Pos();
}
void FileWriter::Seek(uint64_t pos)
{
m_pFileData->Seek(pos);
}
void FileWriter::Write(void const * p, size_t size)
{
m_pFileData->Write(p, size);
}
std::string const & FileWriter::GetName() const
{
return m_pFileData->GetName();
}
uint64_t FileWriter::Size() const
{
return m_pFileData->Size();
}
void FileWriter::Flush() noexcept(false)
{
m_pFileData->Flush();
}
void FileWriter::DeleteFileX(std::string const & fName)
{
UNUSED_VALUE(base::DeleteFileX(fName));
}

View file

@ -0,0 +1,91 @@
#pragma once
#include "coding/internal/file_data.hpp"
#include "coding/write_to_sink.hpp"
#include "coding/writer.hpp"
#include "base/assert.hpp"
#include "base/macros.hpp"
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
// FileWriter, not thread safe.
class FileWriter : public Writer
{
DISALLOW_COPY(FileWriter);
public:
// Values actually match internal FileData::Op enum.
enum Op
{
// Create an empty file for writing. If a file with the same name already exists
// its content is erased and the file is treated as a new empty file.
OP_WRITE_TRUNCATE = 1,
// Open a file for update. The file is created if it does not exist.
OP_WRITE_EXISTING = 2,
// Append to a file. Writing operations append data at the end of the file.
// The file is created if it does not exist.
// Seek should not be called, if file is opened for append.
OP_APPEND = 3
};
explicit FileWriter(std::string const & fileName, Op operation = OP_WRITE_TRUNCATE);
FileWriter(FileWriter && rhs) = default;
virtual ~FileWriter() noexcept(false);
// Writer overrides:
void Seek(uint64_t pos) override;
uint64_t Pos() const override;
void Write(void const * p, size_t size) override;
virtual uint64_t Size() const;
virtual void Flush() noexcept(false);
std::string const & GetName() const;
static void DeleteFileX(std::string const & fName);
protected:
std::unique_ptr<base::FileData> m_pFileData;
};
class FilesContainerWriter : public FileWriter
{
public:
FilesContainerWriter(std::string const & fileName, Op operation) : FileWriter(fileName, operation) {}
void WritePaddingByEnd(size_t factor) { WritePadding(Size(), factor); }
void WritePaddingByPos(size_t factor) { WritePadding(Pos(), factor); }
private:
void WritePadding(uint64_t offset, uint64_t factor)
{
ASSERT_GREATER(factor, 1, ());
uint64_t const padding = ((offset + factor - 1) / factor) * factor - offset;
if (padding == 0)
return;
WriteZeroesToSink(*this, padding);
}
};
class TruncatingFileWriter : public FilesContainerWriter
{
public:
explicit TruncatingFileWriter(std::string const & fileName)
: FilesContainerWriter(fileName, FileWriter::OP_WRITE_EXISTING)
{}
TruncatingFileWriter(TruncatingFileWriter && rhs) = default;
~TruncatingFileWriter() noexcept(false) override
{
m_pFileData->Flush();
m_pFileData->Truncate(Pos());
}
};

View file

@ -0,0 +1,463 @@
#include "coding/files_container.hpp"
#include "coding/internal/file_data.hpp"
#include "coding/read_write_utils.hpp"
#include "coding/varint.hpp"
#include "coding/write_to_sink.hpp"
#include <cstring>
#include <sstream>
#ifdef OMIM_OS_WINDOWS
#include "std/windows.hpp"
#else
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h> // _SC_PAGESIZE
#endif
#include <errno.h>
template <typename Source, typename Info>
void Read(Source & src, Info & i)
{
rw::Read(src, i.m_tag);
i.m_offset = ReadVarUint<uint64_t>(src);
i.m_size = ReadVarUint<uint64_t>(src);
}
template <typename Sink, typename Info>
void Write(Sink & sink, Info const & i)
{
rw::Write(sink, i.m_tag);
WriteVarUint(sink, i.m_offset);
WriteVarUint(sink, i.m_size);
}
std::string DebugPrint(FilesContainerBase::TagInfo const & info)
{
std::ostringstream ss;
ss << "{ " << info.m_tag << ", " << info.m_offset << ", " << info.m_size << " }";
return ss.str();
}
/////////////////////////////////////////////////////////////////////////////
// FilesContainerBase
/////////////////////////////////////////////////////////////////////////////
template <typename Reader>
void FilesContainerBase::ReadInfo(Reader & reader)
{
uint64_t offset = ReadPrimitiveFromPos<uint64_t>(reader, 0);
ReaderSource<Reader> src(reader);
src.Skip(offset);
rw::Read(src, m_info);
}
/////////////////////////////////////////////////////////////////////////////
// FilesContainerR
/////////////////////////////////////////////////////////////////////////////
FilesContainerR::FilesContainerR(std::string const & filePath, uint32_t logPageSize, uint32_t logPageCount)
: m_source(std::make_unique<FileReader>(filePath, logPageSize, logPageCount))
{
ReadInfo(m_source);
}
FilesContainerR::FilesContainerR(TReader const & file) : m_source(file)
{
ReadInfo(m_source);
}
FilesContainerR::TReader FilesContainerR::GetReader(Tag const & tag) const
{
TagInfo const * p = GetInfo(tag);
if (!p)
MYTHROW(Reader::OpenException, ("Can't find section:", GetFileName(), tag));
return m_source.SubReader(p->m_offset, p->m_size);
}
std::pair<uint64_t, uint64_t> FilesContainerR::GetAbsoluteOffsetAndSize(Tag const & tag) const
{
TagInfo const * p = GetInfo(tag);
if (!p)
MYTHROW(Reader::OpenException, ("Can't find section:", GetFileName(), tag));
auto reader = dynamic_cast<FileReader const *>(m_source.GetPtr());
uint64_t const offset = reader ? reader->GetOffset() : 0;
return std::make_pair(offset + p->m_offset, p->m_size);
}
FilesContainerBase::TagInfo const * FilesContainerBase::GetInfo(Tag const & tag) const
{
auto i = lower_bound(m_info.begin(), m_info.end(), tag, LessInfo());
if (i != m_info.end() && i->m_tag == tag)
return &(*i);
else
return 0;
}
namespace detail
{
/////////////////////////////////////////////////////////////////////////////
// MappedFile
/////////////////////////////////////////////////////////////////////////////
void MappedFile::Open(std::string const & fName)
{
Close();
#ifdef OMIM_OS_WINDOWS
m_hFile = CreateFileA(fName.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, NULL);
if (m_hFile == INVALID_HANDLE_VALUE)
MYTHROW(Reader::OpenException, ("Can't open file:", fName, "win last error:", GetLastError()));
m_hMapping = CreateFileMappingA(m_hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (m_hMapping == NULL)
MYTHROW(Reader::OpenException, ("Can't create file's Windows mapping:", fName, "win last error:", GetLastError()));
#else
m_fd = open(fName.c_str(), O_RDONLY | O_NONBLOCK);
if (m_fd == -1)
{
if (errno == EMFILE || errno == ENFILE)
MYTHROW(Reader::TooManyFilesException, ("Can't open file:", fName, ", reason:", strerror(errno)));
else
MYTHROW(Reader::OpenException, ("Can't open file:", fName, ", reason:", strerror(errno)));
}
#endif
}
void MappedFile::Close()
{
#ifdef OMIM_OS_WINDOWS
if (m_hMapping != INVALID_HANDLE_VALUE)
{
CloseHandle(m_hMapping);
m_hMapping = INVALID_HANDLE_VALUE;
}
if (m_hFile != INVALID_HANDLE_VALUE)
{
CloseHandle(m_hFile);
m_hFile = INVALID_HANDLE_VALUE;
}
#else
if (m_fd != -1)
{
close(m_fd);
m_fd = -1;
}
#endif
}
MappedFile::Handle MappedFile::Map(uint64_t offset, uint64_t size, std::string const & tag) const
{
#ifdef OMIM_OS_WINDOWS
SYSTEM_INFO sysInfo;
memset(&sysInfo, 0, sizeof(sysInfo));
GetSystemInfo(&sysInfo);
long const align = sysInfo.dwAllocationGranularity;
#else
long const align = sysconf(_SC_PAGESIZE);
#endif
uint64_t const alignedOffset = (offset / align) * align;
ASSERT_LESS_OR_EQUAL(alignedOffset, offset, ());
uint64_t const length = size + (offset - alignedOffset);
ASSERT_GREATER_OR_EQUAL(length, size, ());
#ifdef OMIM_OS_WINDOWS
void * pMap =
MapViewOfFile(m_hMapping, FILE_MAP_READ, alignedOffset >> (sizeof(DWORD) * 8), DWORD(alignedOffset), length);
if (pMap == NULL)
MYTHROW(Reader::OpenException,
("Can't map section:", tag, "with [offset, size]:", offset, size, "win last error:", GetLastError()));
#else
void * pMap = mmap(0, static_cast<size_t>(length), PROT_READ, MAP_SHARED, m_fd, static_cast<off_t>(alignedOffset));
if (pMap == MAP_FAILED)
MYTHROW(Reader::OpenException,
("Can't map section:", tag, "with [offset, size]:", offset, size, "errno:", strerror(errno)));
#endif
char const * data = reinterpret_cast<char const *>(pMap);
char const * d = data + (offset - alignedOffset);
return Handle(d, data, size, length);
}
} // namespace detail
/////////////////////////////////////////////////////////////////////////////
// FilesMappingContainer
/////////////////////////////////////////////////////////////////////////////
FilesMappingContainer::FilesMappingContainer(std::string const & fName)
{
Open(fName);
}
FilesMappingContainer::~FilesMappingContainer()
{
Close();
}
void FilesMappingContainer::Open(std::string const & fName)
{
{
FileReader reader(fName);
ReadInfo(reader);
}
m_file.Open(fName);
m_name = fName;
}
void FilesMappingContainer::Close()
{
m_file.Close();
m_name.clear();
}
FilesMappingContainer::Handle FilesMappingContainer::Map(Tag const & tag) const
{
TagInfo const * p = GetInfo(tag);
if (!p)
MYTHROW(Reader::OpenException, ("Can't find section:", m_name, tag));
ASSERT_EQUAL(tag, p->m_tag, ());
return m_file.Map(p->m_offset, p->m_size, tag);
}
FileReader FilesMappingContainer::GetReader(Tag const & tag) const
{
TagInfo const * p = GetInfo(tag);
if (!p)
MYTHROW(Reader::OpenException, ("Can't find section:", m_name, tag));
return FileReader(m_name).SubReader(p->m_offset, p->m_size);
}
/////////////////////////////////////////////////////////////////////////////
// FilesMappingContainer::Handle
/////////////////////////////////////////////////////////////////////////////
detail::MappedFile::Handle::~Handle()
{
Unmap();
}
void FilesMappingContainer::Handle::Assign(Handle && h)
{
Unmap();
m_base = h.m_base;
m_origBase = h.m_origBase;
m_size = h.m_size;
m_origSize = h.m_origSize;
h.Reset();
}
void FilesMappingContainer::Handle::Unmap()
{
if (IsValid())
{
#ifdef OMIM_OS_WINDOWS
VERIFY(UnmapViewOfFile(m_origBase), ());
#else
VERIFY(0 == munmap((void *)m_origBase, static_cast<size_t>(m_origSize)), ());
#endif
Reset();
}
}
void FilesMappingContainer::Handle::Reset()
{
m_base = m_origBase = 0;
m_size = m_origSize = 0;
}
/////////////////////////////////////////////////////////////////////////////
// FilesContainerW
/////////////////////////////////////////////////////////////////////////////
FilesContainerW::FilesContainerW(std::string const & fName, FileWriter::Op op) : m_name(fName), m_finished(false)
{
Open(op);
}
void FilesContainerW::Open(FileWriter::Op op)
{
m_needRewrite = true;
switch (op)
{
case FileWriter::OP_WRITE_TRUNCATE: break;
case FileWriter::OP_WRITE_EXISTING:
{
// read an existing service info
FileReader reader(m_name);
ReadInfo(reader);
}
// Important: in append mode we should sort info-vector by offsets
sort(m_info.begin(), m_info.end(), LessOffset());
// Check that all offsets are unique
#ifdef DEBUG
for (size_t i = 1; i < m_info.size(); ++i)
ASSERT(m_info[i - 1].m_offset < m_info[i].m_offset || m_info[i - 1].m_size == 0 || m_info[i].m_size == 0, ());
#endif
break;
default: ASSERT(false, ("Unsupported options")); break;
}
if (m_info.empty())
StartNew();
}
void FilesContainerW::StartNew()
{
// leave space for offset to service info
FileWriter writer(m_name);
uint64_t skip = 0;
writer.Write(&skip, sizeof(skip));
m_needRewrite = false;
}
FilesContainerW::~FilesContainerW()
{
if (!m_finished)
Finish();
}
uint64_t FilesContainerW::SaveCurrentSize()
{
ASSERT(!m_finished, ());
uint64_t const curr = FileReader(m_name).Size();
if (!m_info.empty())
m_info.back().m_size = curr - m_info.back().m_offset;
return curr;
}
void FilesContainerW::DeleteSection(Tag const & tag)
{
{
// rewrite files on disk
FilesContainerR contR(m_name);
FilesContainerW contW(m_name + ".tmp");
for (size_t i = 0; i < m_info.size(); ++i)
if (m_info[i].m_tag != tag)
contW.Write(contR.GetReader(m_info[i].m_tag), m_info[i].m_tag);
}
// swap files
if (!base::DeleteFileX(m_name) || !base::RenameFileX(m_name + ".tmp", m_name))
MYTHROW(RootException, ("Can't rename file", m_name, "Sharing violation or disk error!"));
// do open to update m_info
Open(FileWriter::OP_WRITE_EXISTING);
}
std::unique_ptr<FilesContainerWriter> FilesContainerW::GetWriter(Tag const & tag)
{
ASSERT(!m_finished, ());
InfoContainer::const_iterator it = find_if(m_info.begin(), m_info.end(), EqualTag(tag));
if (it != m_info.end())
{
if (it + 1 == m_info.end())
{
m_info.pop_back();
if (m_info.empty())
StartNew();
else
m_needRewrite = true;
}
else
{
DeleteSection(it->m_tag);
}
}
if (m_needRewrite)
{
m_needRewrite = false;
ASSERT(!m_info.empty(), ());
uint64_t const curr = m_info.back().m_offset + m_info.back().m_size;
auto writer = std::make_unique<TruncatingFileWriter>(m_name);
writer->Seek(curr);
writer->WritePaddingByPos(kSectionAlignment);
m_info.emplace_back(tag, writer->Pos());
ASSERT_EQUAL(m_info.back().m_offset % kSectionAlignment, 0, ());
return writer;
}
else
{
SaveCurrentSize();
auto writer = std::make_unique<FilesContainerWriter>(m_name, FileWriter::OP_APPEND);
writer->WritePaddingByPos(kSectionAlignment);
m_info.emplace_back(tag, writer->Pos());
ASSERT_EQUAL(m_info.back().m_offset % kSectionAlignment, 0, ());
return writer;
}
}
void FilesContainerW::Write(std::string const & fPath, Tag const & tag)
{
Write(ModelReaderPtr(std::make_unique<FileReader>(fPath)), tag);
}
void FilesContainerW::Write(ModelReaderPtr reader, Tag const & tag)
{
ReaderSource<ModelReaderPtr> src(reader);
auto writer = GetWriter(tag);
rw::ReadAndWrite(src, *writer);
}
void FilesContainerW::Write(void const * buffer, size_t size, Tag const & tag)
{
if (size != 0)
GetWriter(tag)->Write(buffer, size);
}
void FilesContainerW::Write(std::vector<char> const & buffer, Tag const & tag)
{
Write(buffer.data(), buffer.size(), tag);
}
void FilesContainerW::Write(std::vector<uint8_t> const & buffer, Tag const & tag)
{
Write(buffer.data(), buffer.size(), tag);
}
void FilesContainerW::Finish()
{
ASSERT(!m_finished, ());
uint64_t const curr = SaveCurrentSize();
FileWriter writer(m_name, FileWriter::OP_WRITE_EXISTING);
writer.Seek(0);
WriteToSink(writer, curr);
writer.Seek(curr);
sort(m_info.begin(), m_info.end(), LessInfo());
rw::Write(writer, m_info);
m_finished = true;
}

View file

@ -0,0 +1,253 @@
#pragma once
#include "coding/file_reader.hpp"
#include "coding/file_writer.hpp"
#include "base/assert.hpp"
#include "base/macros.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
class FilesContainerBase
{
public:
using Tag = std::string;
struct TagInfo
{
TagInfo() = default;
TagInfo(Tag const & tag, uint64_t offset) : m_tag(tag), m_offset(offset) {}
Tag m_tag;
uint64_t m_offset = 0;
uint64_t m_size = 0;
};
/// Alignment of each new section that will be added to a file
/// container, i.e. section's offset in bytes will be a multiple of
/// this value.
///
/// WARNING! Existing sections may not be properly aligned.
static uint64_t constexpr kSectionAlignment = 8;
bool IsExist(Tag const & tag) const { return GetInfo(tag) != 0; }
template <typename ToDo>
void ForEachTagInfo(ToDo && toDo) const
{
std::for_each(m_info.begin(), m_info.end(), std::forward<ToDo>(toDo));
}
protected:
struct LessInfo
{
bool operator()(TagInfo const & t1, TagInfo const & t2) const { return (t1.m_tag < t2.m_tag); }
bool operator()(TagInfo const & t1, Tag const & t2) const { return (t1.m_tag < t2); }
bool operator()(Tag const & t1, TagInfo const & t2) const { return (t1 < t2.m_tag); }
};
struct LessOffset
{
bool operator()(TagInfo const & t1, TagInfo const & t2) const
{
if (t1.m_offset == t2.m_offset)
{
// Element with nonzero size should be the last one,
// for correct append writer mode (FilesContainerW::GetWriter).
return (t1.m_size < t2.m_size);
}
else
return (t1.m_offset < t2.m_offset);
}
bool operator()(TagInfo const & t1, uint64_t const & t2) const { return (t1.m_offset < t2); }
bool operator()(uint64_t const & t1, TagInfo const & t2) const { return (t1 < t2.m_offset); }
};
class EqualTag
{
public:
EqualTag(Tag const & tag) : m_tag(tag) {}
bool operator()(TagInfo const & t) const { return (t.m_tag == m_tag); }
private:
Tag const & m_tag;
};
TagInfo const * GetInfo(Tag const & tag) const;
template <typename Reader>
void ReadInfo(Reader & reader);
using InfoContainer = std::vector<TagInfo>;
InfoContainer m_info;
};
std::string DebugPrint(FilesContainerBase::TagInfo const & info);
class FilesContainerR : public FilesContainerBase
{
public:
using TReader = ModelReaderPtr;
explicit FilesContainerR(std::string const & filePath, uint32_t logPageSize = 10, uint32_t logPageCount = 10);
explicit FilesContainerR(TReader const & file);
TReader GetReader(Tag const & tag) const;
template <typename F>
void ForEachTag(F && f) const
{
for (size_t i = 0; i < m_info.size(); ++i)
f(m_info[i].m_tag);
}
uint64_t GetFileSize() const { return m_source.Size(); }
std::string const & GetFileName() const { return m_source.GetName(); }
std::pair<uint64_t, uint64_t> GetAbsoluteOffsetAndSize(Tag const & tag) const;
private:
TReader m_source;
};
namespace detail
{
class MappedFile
{
public:
MappedFile() = default;
~MappedFile() { Close(); }
void Open(std::string const & fName);
void Close();
class Handle
{
public:
Handle() = default;
Handle(char const * base, char const * alignBase, uint64_t size, uint64_t origSize)
: m_base(base)
, m_origBase(alignBase)
, m_size(size)
, m_origSize(origSize)
{}
Handle(Handle && h) { Assign(std::move(h)); }
Handle & operator=(Handle && h)
{
Assign(std::move(h));
return *this;
}
~Handle();
void Assign(Handle && h);
void Unmap();
bool IsValid() const { return (m_base != 0); }
uint64_t GetSize() const { return m_size; }
template <typename T>
T const * GetData() const
{
ASSERT_EQUAL(m_size % sizeof(T), 0, ());
return reinterpret_cast<T const *>(m_base);
}
template <typename T>
size_t GetDataCount() const
{
ASSERT_EQUAL(m_size % sizeof(T), 0, ());
return (m_size / sizeof(T));
}
private:
void Reset();
char const * m_base = nullptr;
char const * m_origBase = nullptr;
uint64_t m_size = 0;
uint64_t m_origSize = 0;
DISALLOW_COPY(Handle);
};
Handle Map(uint64_t offset, uint64_t size, std::string const & tag) const;
private:
#ifdef OMIM_OS_WINDOWS
void * m_hFile = (void *)-1;
void * m_hMapping = (void *)-1;
#else
int m_fd = -1;
#endif
DISALLOW_COPY(MappedFile);
};
} // namespace detail
class FilesMappingContainer : public FilesContainerBase
{
public:
using Handle = detail::MappedFile::Handle;
/// Do nothing by default, call Open to attach to file.
FilesMappingContainer() = default;
explicit FilesMappingContainer(std::string const & fName);
~FilesMappingContainer();
void Open(std::string const & fName);
void Close();
Handle Map(Tag const & tag) const;
FileReader GetReader(Tag const & tag) const;
std::string const & GetName() const { return m_name; }
private:
std::string m_name;
detail::MappedFile m_file;
};
class FilesContainerW : public FilesContainerBase
{
public:
FilesContainerW(std::string const & fName, FileWriter::Op op = FileWriter::OP_WRITE_TRUNCATE);
~FilesContainerW();
std::unique_ptr<FilesContainerWriter> GetWriter(Tag const & tag);
void Write(std::string const & fPath, Tag const & tag);
void Write(ModelReaderPtr reader, Tag const & tag);
void Write(void const * buffer, size_t size, Tag const & tag);
void Write(std::vector<char> const & buffer, Tag const & tag);
void Write(std::vector<uint8_t> const & buffer, Tag const & tag);
void Finish();
/// Delete section with rewriting file.
/// @precondition Container should be opened with FileWriter::OP_WRITE_EXISTING.
void DeleteSection(Tag const & tag);
std::string const & GetFileName() const { return m_name; }
private:
uint64_t SaveCurrentSize();
void Open(FileWriter::Op op);
void StartNew();
std::string m_name;
bool m_needRewrite;
bool m_finished;
};

View file

@ -0,0 +1,179 @@
#pragma once
#include "bit_streams.hpp"
#include "byte_stream.hpp"
#include "dd_vector.hpp"
#include "reader.hpp"
#include "write_to_sink.hpp"
#include <algorithm>
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>
/// Disk driven vector for optimal storing small values with rare big values.
/// Format:
/// 4 bytes to store vector's size
/// Buffer of ceil(Size * Bits / 8) bytes, e.g. vector of Bits-sized elements.
/// - values in range [0, (1 << Bits) - 2] stored as is
/// - value (1 << Bits) - 2 tells that actual value is stored in the exceptions table below.
/// - value (1 << Bits) - 1 tells that the value is undefined.
/// Buffer with exceptions table, e.g. vector of (index, value) pairs till the end of the reader,
/// sorted by index parameter.
/// Component is stored and used in host's endianness, without any conversions.
template <size_t Bits, /// number of fixed bits
class TReader, /// reader with random offset read functions
typename TSize = uint32_t, /// vector index type (platform independent)
typename TValue = uint32_t /// vector value type (platform independent)
>
class FixedBitsDDVector
{
static_assert(std::is_unsigned<TSize>::value, "");
static_assert(std::is_unsigned<TValue>::value, "");
// 16 - is the maximum bits count to get all needed bits in random access within uint32_t.
static_assert(Bits > 0, "");
static_assert(Bits <= 16, "");
using TSelf = FixedBitsDDVector<Bits, TReader, TSize, TValue>;
struct IndexValue
{
TSize m_index;
TValue m_value;
bool operator<(IndexValue const & rhs) const { return m_index < rhs.m_index; }
};
TReader m_bits;
DDVector<IndexValue, TReader, TSize> m_vector;
#ifdef DEBUG
TSize const m_size;
#endif
using TBlock = uint32_t;
static uint64_t AlignBytesCount(uint64_t count) { return std::max(count, static_cast<uint64_t>(sizeof(TBlock))); }
static TBlock constexpr kMask = (1 << Bits) - 1;
static TBlock constexpr kLargeValue = kMask - 1;
static TBlock constexpr kUndefined = kMask;
TValue FindInVector(TSize index) const
{
auto const it = std::lower_bound(m_vector.begin(), m_vector.end(), IndexValue{index, 0});
ASSERT(it != m_vector.end() && it->m_index == index, ());
return it->m_value;
}
FixedBitsDDVector(TReader const & bitsReader, TReader const & vecReader, TSize size)
: m_bits(bitsReader)
, m_vector(vecReader)
#ifdef DEBUG
, m_size(size)
#endif
{}
public:
static std::unique_ptr<TSelf> Create(TReader const & reader)
{
TSize const size = ReadPrimitiveFromPos<TSize>(reader, 0);
uint64_t const off1 = sizeof(TSize);
uint64_t const off2 = AlignBytesCount((size * Bits + CHAR_BIT - 1) / CHAR_BIT) + off1;
// We cannot use make_unique here because contsructor is private.
return std::unique_ptr<TSelf>(
new TSelf(reader.SubReader(off1, off2 - off1), reader.SubReader(off2, reader.Size() - off2), size));
}
bool Get(TSize index, TValue & value) const
{
ASSERT_LESS(index, m_size, ());
uint64_t const bitsOffset = index * Bits;
uint64_t bytesOffset = bitsOffset / CHAR_BIT;
size_t constexpr kBlockSize = sizeof(TBlock);
if (bytesOffset + kBlockSize > m_bits.Size())
bytesOffset = m_bits.Size() - kBlockSize;
TBlock v = ReadPrimitiveFromPos<TBlock>(m_bits, bytesOffset);
v >>= (bitsOffset - bytesOffset * CHAR_BIT);
v &= kMask;
if (v == kUndefined)
return false;
value = v < kLargeValue ? v : FindInVector(index);
return true;
}
template <class TWriter>
class Builder
{
using TData = std::vector<uint8_t>;
using TempWriter = PushBackByteSink<TData>;
using TBits = BitWriter<TempWriter>;
TData m_data;
TempWriter m_writer;
std::unique_ptr<TBits> m_bits;
std::vector<IndexValue> m_excepts;
TSize m_count = 0;
TSize m_optCount = 0;
TWriter & m_finalWriter;
public:
using ValueType = TValue;
explicit Builder(TWriter & writer) : m_writer(m_data), m_bits(new TBits(m_writer)), m_finalWriter(writer) {}
~Builder()
{
// Final serialization is in dtor only.
// You can't do any intermediate flushes during building vector.
// Reset the bit stream first.
m_bits.reset();
// Write size of vector.
WriteToSink(m_finalWriter, m_count);
// Write bits vector, alignes at least to 4 bytes.
m_data.resize(AlignBytesCount(m_data.size()));
m_finalWriter.Write(m_data.data(), m_data.size());
// Write exceptions table.
m_finalWriter.Write(m_excepts.data(), m_excepts.size() * sizeof(IndexValue));
}
void PushBack(TValue v)
{
if (v >= kLargeValue)
{
m_bits->WriteAtMost32Bits(kLargeValue, Bits);
m_excepts.push_back({m_count, v});
}
else
{
++m_optCount;
m_bits->WriteAtMost32Bits(v, Bits);
}
++m_count;
}
// Pushes a special (undefined) value.
void PushBackUndefined()
{
m_bits->WriteAtMost32Bits(kUndefined, Bits);
++m_optCount;
++m_count;
}
/// @return (number of stored as-is elements, number of all elements)
std::pair<TSize, TSize> GetCount() const { return std::make_pair(m_optCount, m_count); }
};
};

View file

@ -0,0 +1,516 @@
#include "coding/geometry_coding.hpp"
#include "coding/point_coding.hpp"
#include "geometry/mercator.hpp"
#include "base/assert.hpp"
#include <complex>
#include <stack>
namespace
{
inline m2::PointU ClampPoint(m2::PointD const & maxPoint, m2::PointD const & point)
{
using uvalue_t = m2::PointU::value_type;
return {static_cast<uvalue_t>(math::Clamp(point.x, 0.0, maxPoint.x)),
static_cast<uvalue_t>(math::Clamp(point.y, 0.0, maxPoint.y))};
}
struct edge_less_p0
{
using edge_t = tesselator::Edge;
bool operator()(edge_t const & e1, edge_t const & e2) const
{
return (e1.m_p[0] == e2.m_p[0]) ? (e1.m_side < e2.m_side) : (e1.m_p[0] < e2.m_p[0]);
}
bool operator()(edge_t const & e1, int e2) const { return e1.m_p[0] < e2; }
bool operator()(int e1, edge_t const & e2) const { return e1 < e2.m_p[0]; }
};
} // namespace
namespace coding
{
bool TestDecoding(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT const & deltas,
void (*fnDecode)(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points))
{
size_t const count = points.size();
std::vector<m2::PointU> decoded;
decoded.resize(count);
OutPointsT decodedA(decoded);
fnDecode(make_read_adapter(deltas), basePoint, maxPoint, decodedA);
for (size_t i = 0; i < count; ++i)
ASSERT_EQUAL(points[i], decoded[i], ());
return true;
}
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2)
{
return ClampPoint(maxPoint, m2::PointD(p1) + (m2::PointD(p1) - m2::PointD(p2)) / 2.0);
}
uint64_t EncodePointDeltaAsUint(m2::PointU const & actual, m2::PointU const & prediction)
{
return bits::BitwiseMerge(bits::ZigZagEncode(static_cast<int32_t>(actual.x) - static_cast<int32_t>(prediction.x)),
bits::ZigZagEncode(static_cast<int32_t>(actual.y) - static_cast<int32_t>(prediction.y)));
}
m2::PointU DecodePointDeltaFromUint(uint64_t delta, m2::PointU const & prediction)
{
uint32_t x, y;
bits::BitwiseSplit(delta, x, y);
return m2::PointU(prediction.x + bits::ZigZagDecode(x), prediction.y + bits::ZigZagDecode(y));
}
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
m2::PointU const & p3)
{
CHECK_NOT_EQUAL(p2, p3, ());
using std::complex;
complex<double> const c1(p1.x, p1.y);
complex<double> const c2(p2.x, p2.y);
complex<double> const c3(p3.x, p3.y);
complex<double> const d = (c1 - c2) / (c2 - c3);
complex<double> const c0 = c1 + (c1 - c2) * std::polar(0.5, 0.5 * arg(d));
/*
complex<double> const c1(p1.x, p1.y);
complex<double> const c2(p2.x, p2.y);
complex<double> const c3(p3.x, p3.y);
complex<double> const d = (c1 - c2) / (c2 - c3);
complex<double> const c01 = c1 + (c1 - c2) * polar(0.5, arg(d));
complex<double> const c02 = c1 + (c1 - c2) * complex<double>(0.5, 0.0);
complex<double> const c0 = (c01 + c02) * complex<double>(0.5, 0.0);
*/
return ClampPoint(maxPoint, {c0.real(), c0.imag()});
}
m2::PointU PredictPointInTriangle(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
m2::PointU const & p3)
{
// parallelogram prediction
return ClampPoint(maxPoint, m2::PointD(p1 + p2) - m2::PointD(p3));
}
void EncodePolylinePrev1(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas)
{
size_t const count = points.size();
if (count > 0)
{
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
for (size_t i = 1; i < count; ++i)
deltas.push_back(EncodePointDeltaAsUint(points[i], points[i - 1]));
}
ASSERT(TestDecoding(points, basePoint, maxPoint, deltas, &DecodePolylinePrev1), ());
}
void DecodePolylinePrev1(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & /*maxPoint*/,
OutPointsT & points)
{
size_t const count = deltas.size();
if (count > 0)
{
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
for (size_t i = 1; i < count; ++i)
points.push_back(DecodePointDeltaFromUint(deltas[i], points.back()));
}
}
void EncodePolylinePrev2(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas)
{
size_t const count = points.size();
if (count > 0)
{
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
if (count > 1)
{
m2::PointD const maxPointD(maxPoint);
deltas.push_back(EncodePointDeltaAsUint(points[1], points[0]));
for (size_t i = 2; i < count; ++i)
deltas.push_back(
EncodePointDeltaAsUint(points[i], PredictPointInPolyline(maxPointD, points[i - 1], points[i - 2])));
}
}
ASSERT(TestDecoding(points, basePoint, maxPoint, deltas, &DecodePolylinePrev2), ());
}
void DecodePolylinePrev2(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points)
{
size_t const count = deltas.size();
if (count > 0)
{
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
if (count > 1)
{
m2::PointD const maxPointD(maxPoint);
points.push_back(DecodePointDeltaFromUint(deltas[1], points.back()));
for (size_t i = 2; i < count; ++i)
{
size_t const n = points.size();
points.push_back(
DecodePointDeltaFromUint(deltas[i], PredictPointInPolyline(maxPointD, points[n - 1], points[n - 2])));
}
}
}
}
void EncodePolylinePrev3(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas)
{
ASSERT_LESS_OR_EQUAL(basePoint.x, maxPoint.x, (basePoint, maxPoint));
ASSERT_LESS_OR_EQUAL(basePoint.y, maxPoint.y, (basePoint, maxPoint));
size_t const count = points.size();
if (count > 0)
{
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
if (count > 1)
{
deltas.push_back(EncodePointDeltaAsUint(points[1], points[0]));
if (count > 2)
{
m2::PointD const maxPointD(maxPoint);
m2::PointU const prediction = PredictPointInPolyline(maxPointD, points[1], points[0]);
deltas.push_back(EncodePointDeltaAsUint(points[2], prediction));
for (size_t i = 3; i < count; ++i)
{
m2::PointU const prediction = PredictPointInPolyline(maxPointD, points[i - 1], points[i - 2], points[i - 3]);
deltas.push_back(EncodePointDeltaAsUint(points[i], prediction));
}
}
}
}
ASSERT(TestDecoding(points, basePoint, maxPoint, deltas, &DecodePolylinePrev3), ());
}
void DecodePolylinePrev3(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points)
{
ASSERT_LESS_OR_EQUAL(basePoint.x, maxPoint.x, (basePoint, maxPoint));
ASSERT_LESS_OR_EQUAL(basePoint.y, maxPoint.y, (basePoint, maxPoint));
size_t const count = deltas.size();
if (count > 0)
{
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
if (count > 1)
{
m2::PointU const pt0 = points.back();
points.push_back(DecodePointDeltaFromUint(deltas[1], pt0));
if (count > 2)
{
m2::PointD const maxPointD(maxPoint);
points.push_back(DecodePointDeltaFromUint(deltas[2], PredictPointInPolyline(maxPointD, points.back(), pt0)));
for (size_t i = 3; i < count; ++i)
{
size_t const n = points.size();
m2::PointU const prediction = PredictPointInPolyline(maxPointD, points[n - 1], points[n - 2], points[n - 3]);
points.push_back(DecodePointDeltaFromUint(deltas[i], prediction));
}
}
}
}
}
void EncodePolyline(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas)
{
EncodePolylinePrev2(points, basePoint, maxPoint, deltas);
}
void DecodePolyline(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points)
{
DecodePolylinePrev2(deltas, basePoint, maxPoint, points);
}
void EncodeTriangleStrip(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas)
{
size_t const count = points.size();
if (count > 0)
{
ASSERT_GREATER(count, 2, ());
deltas.push_back(EncodePointDeltaAsUint(points[0], basePoint));
deltas.push_back(EncodePointDeltaAsUint(points[1], points[0]));
deltas.push_back(EncodePointDeltaAsUint(points[2], points[1]));
m2::PointD const maxPointD(maxPoint);
for (size_t i = 3; i < count; ++i)
{
m2::PointU const prediction = PredictPointInTriangle(maxPointD, points[i - 1], points[i - 2], points[i - 3]);
deltas.push_back(EncodePointDeltaAsUint(points[i], prediction));
}
}
}
void DecodeTriangleStrip(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points)
{
size_t const count = deltas.size();
if (count > 0)
{
ASSERT_GREATER(count, 2, ());
points.push_back(DecodePointDeltaFromUint(deltas[0], basePoint));
points.push_back(DecodePointDeltaFromUint(deltas[1], points.back()));
points.push_back(DecodePointDeltaFromUint(deltas[2], points.back()));
m2::PointD const maxPointD(maxPoint);
for (size_t i = 3; i < count; ++i)
{
size_t const n = points.size();
m2::PointU const prediction = PredictPointInTriangle(maxPointD, points[n - 1], points[n - 2], points[n - 3]);
points.push_back(DecodePointDeltaFromUint(deltas[i], prediction));
}
}
}
} // namespace coding
namespace serial
{
// GeometryCodingParams ----------------------------------------------------------------------------
GeometryCodingParams::GeometryCodingParams() : m_BasePointUint64(0), m_CoordBits(kPointCoordBits)
{
m_BasePoint = Uint64ToPointUObsolete(m_BasePointUint64);
}
GeometryCodingParams::GeometryCodingParams(uint8_t coordBits, m2::PointD const & pt) : m_CoordBits(coordBits)
{
SetBasePoint(pt);
}
GeometryCodingParams::GeometryCodingParams(uint8_t coordBits, uint64_t basePointUint64)
: m_BasePointUint64(basePointUint64)
, m_CoordBits(coordBits)
{
m_BasePoint = Uint64ToPointUObsolete(m_BasePointUint64);
}
void GeometryCodingParams::SetBasePoint(m2::PointD const & pt)
{
m_BasePoint = PointDToPointU(pt, m_CoordBits);
m_BasePointUint64 = PointUToUint64Obsolete(m_BasePoint);
}
namespace pts
{
m2::PointU D2U(m2::PointD const & p, uint32_t coordBits)
{
return PointDToPointU(p, coordBits);
}
m2::PointD U2D(m2::PointU const & p, uint32_t coordBits)
{
m2::PointD const pt = PointUToPointD(p, coordBits);
ASSERT(mercator::Bounds::kMinX <= pt.x && pt.y <= mercator::Bounds::kMaxX, (p, pt, coordBits));
ASSERT(mercator::Bounds::kMinY <= pt.x && pt.y <= mercator::Bounds::kMaxY, (p, pt, coordBits));
return pt;
}
m2::PointU GetMaxPoint(GeometryCodingParams const & params)
{
return D2U(m2::PointD(mercator::Bounds::kMaxX, mercator::Bounds::kMaxY), params.GetCoordBits());
}
m2::PointU GetBasePoint(GeometryCodingParams const & params)
{
return params.GetBasePoint();
}
} // namespace pts
void Encode(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params,
DeltasT & deltas)
{
size_t const count = points.size();
pts::PointsU upoints;
upoints.reserve(count);
transform(points.begin(), points.end(), std::back_inserter(upoints),
std::bind(&pts::D2U, std::placeholders::_1, params.GetCoordBits()));
ASSERT(deltas.empty(), ());
deltas.resize(count);
coding::OutDeltasT adapt(deltas);
(*fn)(make_read_adapter(upoints), pts::GetBasePoint(params), pts::GetMaxPoint(params), adapt);
}
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params, OutPointsT & points,
size_t reserveF)
{
DecodeImpl(fn, deltas, params, points, reserveF);
}
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params,
std::vector<m2::PointD> & points, size_t reserveF)
{
DecodeImpl(fn, deltas, params, points, reserveF);
}
void const * LoadInner(DecodeFunT fn, void const * pBeg, size_t count, GeometryCodingParams const & params,
OutPointsT & points)
{
DeltasT deltas;
deltas.reserve(count);
void const * ret = ReadVarUint64Array(static_cast<char const *>(pBeg), count, base::MakeBackInsertFunctor(deltas));
Decode(fn, deltas, params, points);
return ret;
}
TrianglesChainSaver::TrianglesChainSaver(GeometryCodingParams const & params)
{
m_base = pts::GetBasePoint(params);
m_max = pts::GetMaxPoint(params);
}
void TrianglesChainSaver::operator()(TPoint arr[3], std::vector<TEdge> edges)
{
m_buffers.push_back(TBuffer());
MemWriter<TBuffer> writer(m_buffers.back());
WriteVarUint(writer, coding::EncodePointDeltaAsUint(arr[0], m_base));
WriteVarUint(writer, coding::EncodePointDeltaAsUint(arr[1], arr[0]));
TEdge curr = edges.front();
curr.m_delta = coding::EncodePointDeltaAsUint(arr[2], arr[1]);
sort(edges.begin(), edges.end(), edge_less_p0());
std::stack<TEdge> st;
while (true)
{
CHECK_EQUAL(curr.m_delta >> 62, 0, ());
uint64_t delta = curr.m_delta << 2;
// find next edges
int const nextNode = curr.m_p[1];
auto i = lower_bound(edges.begin(), edges.end(), nextNode, edge_less_p0());
bool const found = (i != edges.end() && i->m_p[0] == nextNode);
if (found)
{
// fill 2 tree-struct bites
ASSERT_NOT_EQUAL(i->m_side, -1, ());
uint64_t const one = 1;
// first child
delta |= (one << i->m_side);
std::vector<TEdge>::iterator j = i + 1;
if (j != edges.end() && j->m_p[0] == nextNode)
{
// second child
ASSERT_EQUAL(i->m_side, 0, ());
ASSERT_EQUAL(j->m_side, 1, ());
delta |= (one << j->m_side);
// push to stack for further processing
st.push(*j);
}
curr = *i;
}
// write delta for current element
WriteVarUint(writer, delta);
if (!found)
{
// end of chain - pop current from stack or exit
if (st.empty())
break;
else
{
curr = st.top();
st.pop();
}
}
}
}
void DecodeTriangles(coding::InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
coding::OutPointsT & points)
{
size_t const count = deltas.size();
ASSERT_GREATER(count, 2, ());
m2::PointD const maxPointD(maxPoint);
points.push_back(coding::DecodePointDeltaFromUint(deltas[0], basePoint));
points.push_back(coding::DecodePointDeltaFromUint(deltas[1], points.back()));
points.push_back(coding::DecodePointDeltaFromUint(deltas[2] >> 2, points.back()));
std::stack<size_t> st;
size_t ind = 2;
uint8_t treeBits = deltas[2] & 3;
for (size_t i = 3; i < count;)
{
// points 0, 1 - is a common edge
// point 2 - is an opposite point for new triangle to calculate prediction
size_t trg[3];
if (treeBits & 1)
{
// common edge is 1->2
trg[0] = ind;
trg[1] = ind - 1;
trg[2] = ind - 2;
// push to stack for further processing
if (treeBits & 2)
st.push(ind);
}
else if (treeBits & 2)
{
// common edge is 2->0
trg[0] = ind - 2;
trg[1] = ind;
trg[2] = ind - 1;
}
else
{
// end of chain - pop current from stack
ASSERT(!st.empty(), ());
ind = st.top();
st.pop();
treeBits = 2;
continue;
}
// push points
points.push_back(points[trg[0]]);
points.push_back(points[trg[1]]);
points.push_back(coding::DecodePointDeltaFromUint(
deltas[i] >> 2, coding::PredictPointInTriangle(maxPointD, points[trg[0]], points[trg[1]], points[trg[2]])));
// next step
treeBits = deltas[i] & 3;
ind = points.size() - 1;
++i;
}
ASSERT(treeBits == 0 && st.empty(), ());
}
} // namespace serial

View file

@ -0,0 +1,356 @@
#pragma once
#include "geometry/point2d.hpp"
#include "coding/point_coding.hpp"
#include "coding/tesselator_decl.hpp"
#include "coding/varint.hpp"
#include "coding/writer.hpp"
#include "base/array_adapters.hpp"
#include "base/assert.hpp"
#include "base/buffer_vector.hpp"
#include "base/stl_helpers.hpp"
#include <algorithm>
#include <functional>
#include <list>
#include <vector>
namespace coding
{
using InPointsT = array_read<m2::PointU>;
using InDeltasT = array_read<uint64_t>;
using OutPointsT = array_write<m2::PointU>;
using OutDeltasT = array_write<uint64_t>;
// Stores the difference of two points to a single unsigned 64-bit integer.
// It is not recommended to use this function: consider EncodePointDelta instead.
uint64_t EncodePointDeltaAsUint(m2::PointU const & actual, m2::PointU const & prediction);
m2::PointU DecodePointDeltaFromUint(uint64_t delta, m2::PointU const & prediction);
// Writes the difference of two 2d vectors to sink.
template <typename Sink>
void EncodePointDelta(Sink & sink, m2::PointU const & curr, m2::PointU const & next)
{
auto const dx = base::asserted_cast<int32_t>(next.x) - base::asserted_cast<int32_t>(curr.x);
auto const dy = base::asserted_cast<int32_t>(next.y) - base::asserted_cast<int32_t>(curr.y);
WriteVarInt(sink, dx);
WriteVarInt(sink, dy);
}
// Reads the encoded difference from |source| and returns the
// point equal to |base| + difference.
template <typename Source>
m2::PointU DecodePointDelta(Source & source, m2::PointU const & base)
{
auto const dx = ReadVarInt<int32_t>(source);
auto const dy = ReadVarInt<int32_t>(source);
ASSERT(int(base.x) + dx >= 0 && int(base.y) + dy >= 0, (base, dx, dy));
return m2::PointU(base.x + dx, base.y + dy);
}
/// Predict next point for polyline with given previous points (p1, p2).
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2);
/// Predict next point for polyline with given previous points (p1, p2, p3).
m2::PointU PredictPointInPolyline(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
m2::PointU const & p3);
/// Predict point for neighbour triangle with given
/// previous triangle (p1, p2, p3) and common edge (p1, p2).
m2::PointU PredictPointInTriangle(m2::PointD const & maxPoint, m2::PointU const & p1, m2::PointU const & p2,
m2::PointU const & p3);
void EncodePolylinePrev1(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas);
void DecodePolylinePrev1(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points);
void EncodePolylinePrev2(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas);
void DecodePolylinePrev2(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points);
void EncodePolylinePrev3(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas);
void DecodePolylinePrev3(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points);
void EncodePolyline(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas);
void DecodePolyline(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points);
void EncodeTriangleStrip(InPointsT const & points, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutDeltasT & deltas);
void DecodeTriangleStrip(InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
OutPointsT & points);
} // namespace coding
namespace serial
{
class GeometryCodingParams
{
public:
GeometryCodingParams();
GeometryCodingParams(uint8_t coordBits, m2::PointD const & pt);
GeometryCodingParams(uint8_t coordBits, uint64_t basePointUint64);
m2::PointU GetBasePoint() const { return m_BasePoint; }
uint64_t GetBasePointUint64() const { return m_BasePointUint64; }
int64_t GetBasePointInt64() const { return static_cast<int64_t>(m_BasePointUint64); }
void SetBasePoint(m2::PointD const & pt);
uint8_t GetCoordBits() const { return m_CoordBits; }
template <typename WriterT>
void Save(WriterT & writer) const
{
WriteVarUint(writer, GetCoordBits());
WriteVarUint(writer, m_BasePointUint64);
}
template <typename SourceT>
void Load(SourceT & src)
{
uint32_t const coordBits = ReadVarUint<uint32_t>(src);
ASSERT_LESS(coordBits, 32, ());
*this = GeometryCodingParams(coordBits, ReadVarUint<uint64_t>(src));
}
private:
uint64_t m_BasePointUint64;
m2::PointU m_BasePoint;
uint8_t m_CoordBits;
};
namespace pts
{
using PointsU = buffer_vector<m2::PointU, 32>;
m2::PointU D2U(m2::PointD const & p, uint32_t coordBits);
m2::PointD U2D(m2::PointU const & p, uint32_t coordBits);
m2::PointU GetMaxPoint(GeometryCodingParams const & params);
m2::PointU GetBasePoint(GeometryCodingParams const & params);
} // namespace pts
/// @name Encode and Decode function types.
typedef void (*EncodeFunT)(coding::InPointsT const &, m2::PointU const &, m2::PointU const &, coding::OutDeltasT &);
typedef void (*DecodeFunT)(coding::InDeltasT const &, m2::PointU const &, m2::PointU const &, coding::OutPointsT &);
using DeltasT = buffer_vector<uint64_t, 32>;
using OutPointsT = buffer_vector<m2::PointD, 32>;
void Encode(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params,
DeltasT & deltas);
/// @name Overloads for different out container types.
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params, OutPointsT & points,
size_t reserveF = 1);
void Decode(DecodeFunT fn, DeltasT const & deltas, GeometryCodingParams const & params,
std::vector<m2::PointD> & points, size_t reserveF = 1);
template <class TDecodeFun, class TOutPoints>
void DecodeImpl(TDecodeFun fn, DeltasT const & deltas, GeometryCodingParams const & params, TOutPoints & points,
size_t reserveF)
{
size_t const count = deltas.size() * reserveF;
pts::PointsU upoints;
upoints.resize(count);
coding::OutPointsT adapt(upoints);
(*fn)(make_read_adapter(deltas), pts::GetBasePoint(params), pts::GetMaxPoint(params), adapt);
if (points.size() < 2)
{
// Do not call reserve when loading triangles - they are accumulated to one vector.
points.reserve(count);
}
std::transform(upoints.begin(), upoints.begin() + adapt.size(), std::back_inserter(points),
std::bind(&pts::U2D, std::placeholders::_1, params.GetCoordBits()));
}
template <class TSink>
void SavePoint(TSink & sink, m2::PointD const & pt, GeometryCodingParams const & cp)
{
WriteVarUint(sink, coding::EncodePointDeltaAsUint(PointDToPointU(pt, cp.GetCoordBits()), cp.GetBasePoint()));
}
template <class TSource>
m2::PointD LoadPoint(TSource & src, GeometryCodingParams const & cp)
{
m2::PointD const pt = PointUToPointD(coding::DecodePointDeltaFromUint(ReadVarUint<uint64_t>(src), cp.GetBasePoint()),
cp.GetCoordBits());
return pt;
}
template <class TSink>
void SaveInner(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
{
DeltasT deltas;
Encode(fn, points, params, deltas);
WriteVarUintArray(deltas, sink);
}
template <class TSink>
void WriteBufferToSink(std::vector<char> const & buffer, TSink & sink)
{
uint32_t const count = static_cast<uint32_t>(buffer.size());
WriteVarUint(sink, count);
sink.Write(&buffer[0], count);
}
template <class TSink>
void SaveOuter(EncodeFunT fn, std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
{
DeltasT deltas;
Encode(fn, points, params, deltas);
std::vector<char> buffer;
MemWriter<std::vector<char>> writer(buffer);
WriteVarUintArray(deltas, writer);
WriteBufferToSink(buffer, sink);
}
void const * LoadInner(DecodeFunT fn, void const * pBeg, size_t count, GeometryCodingParams const & params,
OutPointsT & points);
template <class TSource, class TPoints>
void LoadOuter(DecodeFunT fn, TSource & src, GeometryCodingParams const & params, TPoints & points, size_t reserveF = 1)
{
uint32_t const count = ReadVarUint<uint32_t>(src);
std::vector<char> buffer(count);
char * p = &buffer[0];
src.Read(p, count);
DeltasT deltas;
deltas.reserve(count / 2);
ReadVarUint64Array(p, p + count, base::MakeBackInsertFunctor(deltas));
Decode(fn, deltas, params, points, reserveF);
}
/// @name Paths.
template <class TSink>
void SaveInnerPath(std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
{
SaveInner(&coding::EncodePolyline, points, params, sink);
}
template <class TSink>
void SaveOuterPath(std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
{
SaveOuter(&coding::EncodePolyline, points, params, sink);
}
inline void const * LoadInnerPath(void const * pBeg, size_t count, GeometryCodingParams const & params,
OutPointsT & points)
{
return LoadInner(&coding::DecodePolyline, pBeg, count, params, points);
}
template <class TSource, class TPoints>
void LoadOuterPath(TSource & src, GeometryCodingParams const & params, TPoints & points)
{
LoadOuter(&coding::DecodePolyline, src, params, points);
}
/// @name Triangles.
template <class TSink>
void SaveInnerTriangles(std::vector<m2::PointD> const & points, GeometryCodingParams const & params, TSink & sink)
{
SaveInner(&coding::EncodeTriangleStrip, points, params, sink);
}
inline void StripToTriangles(size_t count, OutPointsT const & strip, OutPointsT & triangles)
{
CHECK_GREATER_OR_EQUAL(count, 2, ());
triangles.clear();
triangles.reserve((count - 2) * 3);
for (size_t i = 2; i < count; ++i)
{
triangles.push_back(strip[i - 2]);
triangles.push_back(strip[i - 1]);
triangles.push_back(strip[i]);
}
}
inline void const * LoadInnerTriangles(void const * pBeg, size_t count, GeometryCodingParams const & params,
OutPointsT & triangles)
{
CHECK_GREATER_OR_EQUAL(count, 2, ());
OutPointsT points;
void const * res = LoadInner(&coding::DecodeTriangleStrip, pBeg, count, params, points);
StripToTriangles(count, points, triangles);
return res;
}
void DecodeTriangles(coding::InDeltasT const & deltas, m2::PointU const & basePoint, m2::PointU const & maxPoint,
coding::OutPointsT & triangles);
template <class TSource>
void LoadOuterTriangles(TSource & src, GeometryCodingParams const & params, OutPointsT & triangles)
{
uint32_t const count = ReadVarUint<uint32_t>(src);
for (uint32_t i = 0; i < count; ++i)
LoadOuter(&DecodeTriangles, src, params, triangles, 3);
}
class TrianglesChainSaver
{
using TPoint = m2::PointU;
using TEdge = tesselator::Edge;
using TBuffer = std::vector<char>;
TPoint m_base;
TPoint m_max;
std::list<TBuffer> m_buffers;
public:
explicit TrianglesChainSaver(GeometryCodingParams const & params);
TPoint GetBasePoint() const { return m_base; }
TPoint GetMaxPoint() const { return m_max; }
void operator()(TPoint arr[3], std::vector<TEdge> edges);
size_t GetBufferSize() const
{
size_t sz = 0;
for (auto const & i : m_buffers)
sz += i.size();
return sz;
}
template <class TSink>
void Save(TSink & sink)
{
// Not necessary assumption that 3-bytes varuint
// is enough for triangle chains count.
size_t const count = m_buffers.size();
CHECK_LESS_OR_EQUAL(count, 0x1FFFFF, ());
WriteVarUint(sink, static_cast<uint32_t>(count));
std::for_each(m_buffers.begin(), m_buffers.end(),
std::bind(&WriteBufferToSink<TSink>, std::placeholders::_1, std::ref(sink)));
}
};
} // namespace serial

48
libs/coding/hex.cpp Normal file
View file

@ -0,0 +1,48 @@
#include "coding/hex.hpp"
#include "base/assert.hpp"
namespace impl
{
static char constexpr kToHexTable[] = "0123456789ABCDEF";
void ToHexRaw(void const * src, size_t size, void * dst)
{
uint8_t const * ptr = static_cast<uint8_t const *>(src);
uint8_t const * end = ptr + size;
uint8_t * out = static_cast<uint8_t *>(dst);
while (ptr != end)
{
*out++ = kToHexTable[(*ptr) >> 4];
*out++ = kToHexTable[(*ptr) & 0xF];
++ptr;
}
}
uint8_t HexDigitToRaw(uint8_t const digit)
{
if (digit >= '0' && digit <= '9')
return (digit - '0');
else if (digit >= 'A' && digit <= 'F')
return (digit - 'A' + 10);
else if (digit >= 'a' && digit <= 'f')
return (digit - 'a' + 10);
ASSERT(false, (digit));
return 0;
}
void FromHexRaw(void const * src, size_t size, void * dst)
{
uint8_t const * ptr = static_cast<uint8_t const *>(src);
uint8_t const * end = ptr + size;
uint8_t * out = static_cast<uint8_t *>(dst);
while (ptr < end)
{
*out = HexDigitToRaw(*ptr++) << 4;
*out |= HexDigitToRaw(*ptr++);
++out;
}
}
} // namespace impl

106
libs/coding/hex.hpp Normal file
View file

@ -0,0 +1,106 @@
#pragma once
#include "base/base.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#include <type_traits>
namespace impl
{
void ToHexRaw(void const * src, size_t size, void * dst);
void FromHexRaw(void const * src, size_t size, void * dst);
} // namespace impl
inline std::string ToHex(void const * ptr, size_t size)
{
std::string result;
if (size == 0)
return result;
result.resize(size * 2);
::impl::ToHexRaw(ptr, size, &result[0]);
return result;
}
template <typename ContainerT>
inline std::string ToHex(ContainerT const & container)
{
static_assert(sizeof(*container.begin()) == 1, "");
if (container.empty())
return {};
return ToHex(&*container.begin(), container.end() - container.begin());
}
/// Conversion with specializations to avoid warnings
/// @{
template <typename IntT>
inline std::string NumToHex(IntT n)
{
static_assert(std::is_integral<IntT>::value, "");
uint8_t buf[sizeof(n)];
for (size_t i = 0; i < sizeof(n); ++i)
{
buf[i] = (n >> ((sizeof(n) - 1) * 8));
n <<= 8;
}
return ToHex(buf, sizeof(buf));
}
template <>
inline std::string NumToHex<int8_t>(int8_t c)
{
return ToHex(&c, sizeof(c));
}
template <>
inline std::string NumToHex<uint8_t>(uint8_t c)
{
return ToHex(&c, sizeof(c));
}
template <>
inline std::string NumToHex<char>(char c)
{
return ToHex(&c, sizeof(c));
}
/// @}
inline std::string FromHex(std::string_view s)
{
std::string result;
result.resize(s.size() / 2);
::impl::FromHexRaw(s.data(), s.size(), &result[0]);
return result;
}
inline std::string ByteToQuat(uint8_t n)
{
std::string result;
for (size_t i = 0; i < 4; ++i)
{
result += char(((n & 0xC0) >> 6) + '0');
n <<= 2;
}
return result;
}
template <typename IntT>
inline std::string NumToQuat(IntT n)
{
std::string result;
for (size_t i = 0; i < sizeof(n); ++i)
{
uint8_t ub = n >> (sizeof(n) * 8 - 8);
result += ByteToQuat(ub);
n <<= 8;
}
return result;
}

112
libs/coding/huffman.cpp Normal file
View file

@ -0,0 +1,112 @@
#include "coding/huffman.hpp"
#include "base/logging.hpp"
#include <queue>
#include <utility>
namespace coding
{
HuffmanCoder::~HuffmanCoder()
{
DeleteHuffmanTree(m_root);
}
bool HuffmanCoder::Encode(uint32_t symbol, Code & code) const
{
auto it = m_encoderTable.find(symbol);
if (it == m_encoderTable.end())
return false;
code = it->second;
return true;
}
bool HuffmanCoder::Decode(Code const & code, uint32_t & symbol) const
{
auto it = m_decoderTable.find(code);
if (it == m_decoderTable.end())
return false;
symbol = it->second;
return true;
}
void HuffmanCoder::BuildTables(Node * root, uint32_t path)
{
if (!root)
return;
if (root->isLeaf)
{
Code code(path, root->depth);
m_encoderTable[root->symbol] = code;
m_decoderTable[code] = root->symbol;
return;
}
BuildTables(root->l, path);
BuildTables(root->r, path + (static_cast<uint32_t>(1) << root->depth));
}
void HuffmanCoder::Clear()
{
DeleteHuffmanTree(m_root);
m_root = nullptr;
m_encoderTable.clear();
m_decoderTable.clear();
}
void HuffmanCoder::DeleteHuffmanTree(Node * root)
{
if (!root)
return;
DeleteHuffmanTree(root->l);
DeleteHuffmanTree(root->r);
delete root;
}
void HuffmanCoder::BuildHuffmanTree(Freqs const & freqs)
{
std::priority_queue<Node *, std::vector<Node *>, NodeComparator> pq;
for (auto const & e : freqs.GetTable())
pq.push(new Node(e.first, e.second, true /* isLeaf */));
if (pq.empty())
{
m_root = nullptr;
return;
}
while (pq.size() > 1)
{
auto a = pq.top();
pq.pop();
auto b = pq.top();
pq.pop();
if (a->symbol > b->symbol)
std::swap(a, b);
// Give it the smaller symbol to make the resulting encoding more predictable.
auto ab = new Node(a->symbol, a->freq + b->freq, false /* isLeaf */);
ab->l = a;
ab->r = b;
pq.push(ab);
}
m_root = pq.top();
pq.pop();
SetDepths(m_root, 0 /* depth */);
}
void HuffmanCoder::SetDepths(Node * root, uint32_t depth)
{
// One would need more than 2^32 symbols to build a code that long.
// On the other hand, 32 is short enough for our purposes, so do not
// try to shrink the trees beyond this threshold.
uint32_t constexpr kMaxDepth = 32;
if (!root)
return;
CHECK_LESS_OR_EQUAL(depth, kMaxDepth, ());
root->depth = depth;
SetDepths(root->l, depth + 1);
SetDepths(root->r, depth + 1);
}
} // namespace coding

312
libs/coding/huffman.hpp Normal file
View file

@ -0,0 +1,312 @@
#pragma once
#include "coding/bit_streams.hpp"
#include "coding/varint.hpp"
#include "base/assert.hpp"
#include "base/checked_cast.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <map>
#include <memory>
#include <type_traits>
#include <vector>
namespace coding
{
class HuffmanCoder
{
public:
class Freqs
{
public:
using Table = std::map<uint32_t, uint32_t>;
Freqs() = default;
template <typename... Args>
Freqs(Args const &... args)
{
Add(args...);
}
void Add(strings::UniString const & s) { Add(s.begin(), s.end()); }
void Add(std::string const & s) { Add(s.begin(), s.end()); }
template <typename T>
void Add(T const * begin, T const * const end)
{
static_assert(std::is_integral<T>::value, "");
AddImpl(begin, end);
}
template <typename It>
void Add(It begin, It const end)
{
static_assert(std::is_integral<typename It::value_type>::value, "");
AddImpl(begin, end);
}
template <typename T>
void Add(std::vector<T> const & v)
{
for (auto const & e : v)
Add(std::begin(e), std::end(e));
}
Table const & GetTable() const { return m_table; }
private:
template <typename It>
void AddImpl(It begin, It const end)
{
static_assert(sizeof(*begin) <= 4, "");
for (; begin != end; ++begin)
++m_table[static_cast<uint32_t>(*begin)];
}
Table m_table;
};
// A Code encodes a path to a leaf. It is read starting from
// the least significant bit.
struct Code
{
uint32_t bits;
size_t len;
Code() : bits(0), len(0) {}
Code(uint32_t bits, size_t len) : bits(bits), len(len) {}
bool operator<(Code const & o) const
{
if (bits != o.bits)
return bits < o.bits;
return len < o.len;
}
};
HuffmanCoder() : m_root(nullptr) {}
~HuffmanCoder();
// Internally builds a Huffman tree and makes
// the EncodeAndWrite and ReadAndDecode methods available.
template <typename... Args>
void Init(Args const &... args)
{
Clear();
BuildHuffmanTree(Freqs(args...));
BuildTables(m_root, 0);
}
void Clear();
// One way to store the encoding would be
// -- the succinct representation of the topology of Huffman tree;
// -- the list of symbols that are stored in the leaves, as varuints in delta encoding.
// This would probably be an overkill.
template <typename TWriter>
void WriteEncoding(TWriter & writer)
{
// @todo Do not waste space, use BitWriter.
WriteVarUint(writer, m_decoderTable.size());
for (auto const & kv : m_decoderTable)
{
WriteVarUint(writer, kv.first.bits);
WriteVarUint(writer, kv.first.len);
WriteVarUint(writer, kv.second);
}
}
template <typename TSource>
void ReadEncoding(TSource & src)
{
DeleteHuffmanTree(m_root);
m_root = new Node(0 /* symbol */, 0 /* freq */, false /* isLeaf */);
m_encoderTable.clear();
m_decoderTable.clear();
size_t sz = static_cast<size_t>(ReadVarUint<uint32_t, TSource>(src));
for (size_t i = 0; i < sz; ++i)
{
uint32_t bits = ReadVarUint<uint32_t, TSource>(src);
uint32_t len = ReadVarUint<uint32_t, TSource>(src);
uint32_t symbol = ReadVarUint<uint32_t, TSource>(src);
Code code(bits, len);
m_encoderTable[symbol] = code;
m_decoderTable[code] = symbol;
Node * cur = m_root;
for (size_t j = 0; j < len; ++j)
{
if (((bits >> j) & 1) == 0)
{
if (!cur->l)
cur->l = new Node(0 /* symbol */, 0 /* freq */, false /* isLeaf */);
cur = cur->l;
}
else
{
if (!cur->r)
cur->r = new Node(0 /* symbol */, 0 /* freq */, false /* isLeaf */);
cur = cur->r;
}
cur->depth = j + 1;
}
cur->isLeaf = true;
cur->symbol = symbol;
}
}
bool Encode(uint32_t symbol, Code & code) const;
bool Decode(Code const & code, uint32_t & symbol) const;
template <typename TWriter, typename T>
uint32_t EncodeAndWrite(TWriter & writer, T const * begin, T const * end) const
{
static_assert(std::is_integral<T>::value, "");
return EncodeAndWriteImpl(writer, begin, end);
}
template <typename TWriter, typename It>
uint32_t EncodeAndWrite(TWriter & writer, It begin, It end) const
{
static_assert(std::is_integral<typename It::value_type>::value, "");
return EncodeAndWriteImpl(writer, begin, end);
}
template <typename TWriter>
uint32_t EncodeAndWrite(TWriter & writer, std::string const & s) const
{
return EncodeAndWrite(writer, s.begin(), s.end());
}
// Returns the number of bits written AFTER the size, i.e. the number
// of bits that the encoded string consists of.
template <typename TWriter>
uint32_t EncodeAndWrite(TWriter & writer, strings::UniString const & s) const
{
return EncodeAndWrite(writer, s.begin(), s.end());
}
template <typename TSource, typename OutIt>
OutIt ReadAndDecode(TSource & src, OutIt out) const
{
BitReader<TSource> bitReader(src);
size_t sz = static_cast<size_t>(ReadVarUint<uint32_t, TSource>(src));
for (size_t i = 0; i < sz; ++i)
*out++ = ReadAndDecode(bitReader);
return out;
}
template <typename TSource>
strings::UniString ReadAndDecode(TSource & src) const
{
strings::UniString result;
ReadAndDecode(src, std::back_inserter(result));
return result;
}
private:
struct Node
{
Node *l, *r;
uint32_t symbol;
uint32_t freq;
size_t depth;
bool isLeaf;
Node(uint32_t symbol, uint32_t freq, bool isLeaf)
: l(nullptr)
, r(nullptr)
, symbol(symbol)
, freq(freq)
, depth(0)
, isLeaf(isLeaf)
{}
};
struct NodeComparator
{
bool operator()(Node const * const a, Node const * const b) const
{
if (a->freq != b->freq)
return a->freq > b->freq;
return a->symbol > b->symbol;
}
};
// No need to clump the interface: keep private the methods
// that encode one symbol only.
template <typename TWriter>
size_t EncodeAndWrite(BitWriter<TWriter> & bitWriter, uint32_t symbol) const
{
Code code;
CHECK(Encode(symbol, code), ());
size_t fullBytes = code.len / CHAR_BIT;
size_t rem = code.len % CHAR_BIT;
for (size_t i = 0; i < fullBytes; ++i)
{
bitWriter.Write(code.bits & 0xFF, CHAR_BIT);
code.bits >>= CHAR_BIT;
}
bitWriter.Write(code.bits, rem);
return code.len;
}
template <typename TWriter, typename It>
uint32_t EncodeAndWriteImpl(TWriter & writer, It begin, It end) const
{
static_assert(sizeof(*begin) <= 4, "");
size_t const d = base::asserted_cast<size_t>(std::distance(begin, end));
BitWriter<TWriter> bitWriter(writer);
WriteVarUint(writer, d);
uint32_t sz = 0;
for (; begin != end; ++begin)
sz += EncodeAndWrite(bitWriter, static_cast<uint32_t>(*begin));
return sz;
}
template <typename TSource>
uint32_t ReadAndDecode(BitReader<TSource> & bitReader) const
{
Node * cur = m_root;
while (cur)
{
if (cur->isLeaf)
return cur->symbol;
uint8_t bit = bitReader.Read(1);
if (bit == 0)
cur = cur->l;
else
cur = cur->r;
}
CHECK(false, ("Could not decode a Huffman-encoded symbol."));
return 0;
}
// Converts a Huffman tree into the more convenient representation
// of encoding and decoding tables.
void BuildTables(Node * root, uint32_t path);
void DeleteHuffmanTree(Node * root);
void BuildHuffmanTree(Freqs const & freqs);
// Properly sets the depth field in the subtree rooted at root.
// It is easier to do it after the tree is built.
void SetDepths(Node * root, uint32_t depth);
Node * m_root;
std::map<Code, uint32_t> m_decoderTable;
std::map<uint32_t, Code> m_encoderTable;
};
} // namespace coding

View file

@ -0,0 +1,29 @@
#pragma once
#include "std/target_os.hpp"
#if defined(OMIM_OS_WINDOWS_NATIVE)
#define fseek64 _fseeki64
#define ftell64 _ftelli64
#elif defined(OMIM_OS_WINDOWS_MINGW)
#define fseek64 fseeko64
#define ftell64 ftello64
#else
// POSIX standart.
#include <sys/types.h>
// TODO: Always assert for 8 bytes after increasing min Android API to 24+.
// See more details here: https://android.googlesource.com/platform/bionic/+/master/docs/32-bit-abi.md
#if defined(OMIM_OS_ANDROID) && (defined(__arm__) || defined(__i386__))
static_assert(sizeof(off_t) == 4, "32-bit Android NDK < API 24 has only 32-bit file operations support");
#else
static_assert(sizeof(off_t) == 8, "FileReader and FileWriter require 64-bit file operations");
#endif
#define fseek64 fseeko
#define ftell64 ftello
#endif
#include <cstdio>

View file

@ -0,0 +1,339 @@
#include "coding/internal/file_data.hpp"
#include "coding/constants.hpp"
#include "coding/internal/file64_api.hpp"
#include "coding/reader.hpp" // For Reader exceptions.
#include "coding/writer.hpp" // For Writer exceptions.
#include "base/exception.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include "std/target_os.hpp"
#include <algorithm>
#include <cerrno>
#include <cstring>
#include <exception>
#include <fstream>
#include <vector>
#ifdef OMIM_OS_WINDOWS
#include <io.h>
#else
#include <unistd.h> // ftruncate
#endif
namespace base
{
using namespace std;
std::ostream & operator<<(std::ostream & stream, FileData::Op op)
{
switch (op)
{
case FileData::Op::READ: stream << "READ"; break;
case FileData::Op::WRITE_TRUNCATE: stream << "WRITE_TRUNCATE"; break;
case FileData::Op::WRITE_EXISTING: stream << "WRITE_EXISTING"; break;
case FileData::Op::APPEND: stream << "APPEND"; break;
}
return stream;
}
FileData::FileData(string const & fileName, Op op) : m_FileName(fileName), m_Op(op)
{
char const * const modes[] = {"rb", "wb", "r+b", "ab"};
m_File = fopen(fileName.c_str(), modes[static_cast<int>(op)]);
if (m_File)
{
#if defined(_MSC_VER)
// Move file pointer to the end of the file to make it consistent with other platforms
if (op == Op::APPEND)
fseek64(m_File, 0, SEEK_END);
#endif
return;
}
if (op == Op::WRITE_EXISTING)
{
// Special case, since "r+b" fails if file doesn't exist.
m_File = fopen(fileName.c_str(), "wb");
if (m_File)
return;
}
// if we're here - something bad is happened
if (m_Op != Op::READ)
MYTHROW(Writer::OpenException, (GetErrorProlog()));
else
MYTHROW(Reader::OpenException, (GetErrorProlog()));
}
FileData::~FileData()
{
if (m_File)
{
if (fclose(m_File))
LOG(LWARNING, ("Error closing file", GetErrorProlog()));
}
}
string FileData::GetErrorProlog() const
{
std::ostringstream stream;
stream << m_FileName << "; " << m_Op << "; " << strerror(errno);
return stream.str();
}
static int64_t constexpr INVALID_POS = -1;
uint64_t FileData::Size() const
{
int64_t const pos = ftell64(m_File);
if (pos == INVALID_POS)
MYTHROW(Reader::SizeException, (GetErrorProlog(), pos));
if (fseek64(m_File, 0, SEEK_END))
MYTHROW(Reader::SizeException, (GetErrorProlog()));
int64_t const size = ftell64(m_File);
if (size == INVALID_POS)
MYTHROW(Reader::SizeException, (GetErrorProlog(), size));
if (fseek64(m_File, static_cast<off_t>(pos), SEEK_SET))
MYTHROW(Reader::SizeException, (GetErrorProlog(), pos));
ASSERT_GREATER_OR_EQUAL(size, 0, ());
return static_cast<uint64_t>(size);
}
void FileData::Read(uint64_t pos, void * p, size_t size)
{
if (fseek64(m_File, static_cast<off_t>(pos), SEEK_SET))
MYTHROW(Reader::ReadException, (GetErrorProlog(), pos));
size_t const bytesRead = fread(p, 1, size, m_File);
if (bytesRead != size || ferror(m_File))
MYTHROW(Reader::ReadException, (GetErrorProlog(), bytesRead, pos, size));
}
uint64_t FileData::Pos() const
{
int64_t const pos = ftell64(m_File);
if (pos == INVALID_POS)
MYTHROW(Writer::PosException, (GetErrorProlog(), pos));
ASSERT_GREATER_OR_EQUAL(pos, 0, ());
return static_cast<uint64_t>(pos);
}
void FileData::Seek(uint64_t pos)
{
ASSERT_NOT_EQUAL(m_Op, Op::APPEND, (m_FileName, m_Op, pos));
if (fseek64(m_File, static_cast<off_t>(pos), SEEK_SET))
MYTHROW(Writer::SeekException, (GetErrorProlog(), pos));
}
void FileData::Write(void const * p, size_t size)
{
size_t const bytesWritten = fwrite(p, 1, size, m_File);
if (bytesWritten != size || ferror(m_File))
MYTHROW(Writer::WriteException, (GetErrorProlog(), bytesWritten, size));
}
void FileData::Flush()
{
if (fflush(m_File))
MYTHROW(Writer::WriteException, (GetErrorProlog()));
}
void FileData::Truncate(uint64_t sz)
{
#ifdef OMIM_OS_WINDOWS
int const res = _chsize(fileno(m_File), sz);
#else
int const res = ftruncate(fileno(m_File), static_cast<off_t>(sz));
#endif
if (res)
MYTHROW(Writer::WriteException, (GetErrorProlog(), sz));
}
bool GetFileSize(string const & fName, uint64_t & sz)
{
try
{
typedef FileData fdata_t;
fdata_t f(fName, fdata_t::Op::READ);
sz = f.Size();
return true;
}
catch (RootException const &)
{
// supress all exceptions here
return false;
}
}
namespace
{
bool CheckFileOperationResult(int res, string const & fName)
{
if (!res)
return true;
LOG(LWARNING, ("File operation error for file:", fName, "-", strerror(errno)));
// additional check if file really was removed correctly
uint64_t dummy;
if (GetFileSize(fName, dummy))
LOG(LERROR, ("File exists but can't be deleted. Sharing violation?", fName));
return false;
}
bool IsEOF(ifstream & fs)
{
return fs.peek() == ifstream::traits_type::eof();
}
} // namespace
bool DeleteFileX(string const & fName)
{
int res = remove(fName.c_str());
return CheckFileOperationResult(res, fName);
}
bool RenameFileX(string const & fOld, string const & fNew)
{
int res = rename(fOld.c_str(), fNew.c_str());
return CheckFileOperationResult(res, fOld);
}
bool MoveFileX(string const & fOld, string const & fNew)
{
// Try to rename the file first.
int res = rename(fOld.c_str(), fNew.c_str());
if (res == 0)
return true;
// Otherwise perform the full move.
if (!CopyFileX(fOld, fNew))
{
(void)DeleteFileX(fNew);
return false;
}
(void)DeleteFileX(fOld);
return true;
}
bool WriteToTempAndRenameToFile(string const & dest, function<bool(string const &)> const & write, string const & tmp)
{
string const tmpFileName = tmp.empty() ? dest + ".tmp" + strings::to_string(this_thread::get_id()) : tmp;
if (!write(tmpFileName))
{
LOG(LERROR, ("Can't write to", tmpFileName));
DeleteFileX(tmpFileName);
return false;
}
if (!RenameFileX(tmpFileName, dest))
{
LOG(LERROR, ("Can't rename file", tmpFileName, "to", dest));
DeleteFileX(tmpFileName);
return false;
}
return true;
}
void AppendFileToFile(string const & fromFilename, string const & toFilename)
{
ifstream from;
from.exceptions(fstream::failbit | fstream::badbit);
from.open(fromFilename, ios::binary);
ofstream to;
to.exceptions(fstream::badbit);
to.open(toFilename, ios::binary | ios::app);
auto * buffer = from.rdbuf();
if (!IsEOF(from))
to << buffer;
}
bool CopyFileX(string const & fOld, string const & fNew)
{
ifstream ifs;
ofstream ofs;
ifs.exceptions(ifstream::failbit | ifstream::badbit);
ofs.exceptions(ifstream::failbit | ifstream::badbit);
try
{
ifs.open(fOld.c_str());
ofs.open(fNew.c_str());
// If source file is empty - make empty dest file without any errors.
if (IsEOF(ifs))
return true;
ofs << ifs.rdbuf();
ofs.flush();
return true;
}
catch (system_error const &)
{
LOG(LWARNING, ("Failed to copy file from", fOld, "to", fNew, ":", strerror(errno)));
}
catch (exception const &)
{
LOG(LERROR, ("Unknown error when coping files:", fOld, "to", fNew, strerror(errno)));
}
// Don't care about possible error here ..
(void)DeleteFileX(fNew);
return false;
}
bool IsEqualFiles(string const & firstFile, string const & secondFile)
{
FileData first(firstFile, FileData::Op::READ);
FileData second(secondFile, FileData::Op::READ);
if (first.Size() != second.Size())
return false;
size_t constexpr bufSize = READ_FILE_BUFFER_SIZE;
vector<char> buf1, buf2;
buf1.resize(bufSize);
buf2.resize(bufSize);
size_t const fileSize = static_cast<size_t>(first.Size());
size_t currSize = 0;
while (currSize < fileSize)
{
size_t const toRead = min(bufSize, fileSize - currSize);
first.Read(currSize, &buf1[0], toRead);
second.Read(currSize, &buf2[0], toRead);
if (buf1 != buf2)
return false;
currSize += toRead;
}
return true;
}
std::vector<uint8_t> ReadFile(std::string const & filePath)
{
FileData file(filePath, FileData::Op::READ);
uint64_t const sz = file.Size();
std::vector<uint8_t> contents(sz);
file.Read(0, contents.data(), sz);
return contents;
}
} // namespace base

View file

@ -0,0 +1,69 @@
#pragma once
#include "base/macros.hpp"
#include <cstddef>
#include <cstdint>
#include <functional>
#include <string>
namespace base
{
class FileData
{
public:
/// @note Do not change order (@see FileData::FileData).
enum class Op
{
READ = 0,
WRITE_TRUNCATE,
WRITE_EXISTING,
APPEND
};
FileData(std::string const & fileName, Op op);
~FileData();
uint64_t Size() const;
uint64_t Pos() const;
void Seek(uint64_t pos);
void Read(uint64_t pos, void * p, size_t size);
void Write(void const * p, size_t size);
void Flush();
void Truncate(uint64_t sz);
std::string const & GetName() const { return m_FileName; }
private:
FILE * m_File;
std::string const m_FileName;
Op const m_Op;
std::string GetErrorProlog() const;
DISALLOW_COPY(FileData);
};
bool GetFileSize(std::string const & fName, uint64_t & sz);
bool DeleteFileX(std::string const & fName);
bool RenameFileX(std::string const & fOld, std::string const & fNew);
/// Write to temp file and rename it to dest. Delete temp on failure.
/// @param write function that writes to file with a given name, returns true on success.
bool WriteToTempAndRenameToFile(std::string const & dest, std::function<bool(std::string const &)> const & write,
std::string const & tmp = "");
void AppendFileToFile(std::string const & fromFilename, std::string const & toFilename);
/// @return false if copy fails. DOES NOT THROWS exceptions
bool CopyFileX(std::string const & fOld, std::string const & fNew);
/// @return false if moving fails. DOES NOT THROW exceptions
bool MoveFileX(std::string const & fOld, std::string const & fNew);
bool IsEqualFiles(std::string const & firstFile, std::string const & secondFile);
std::vector<uint8_t> ReadFile(std::string const & filePath);
} // namespace base

View file

@ -0,0 +1,162 @@
#pragma once
#include "base/assert.hpp"
#include "base/logging.hpp"
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#endif
#include <memory>
#include <sstream>
#include <string>
#ifndef XML_STATIC
#define XML_STATIC
#endif
#include <expat.h>
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
/// Dispatcher's methods Push, Pop and AddAttr can conveniently take different parameters:
/// 1. char const * (no any overhead, is called by the Expat)
/// 2. std::string or std::string const & (temporary std::string is created from char const *)
/// 3. std::string_view (created from char const *)
///
/// CharData accepts std::string const & or std::string & to modify the data before consumption.
template <typename DispatcherT>
class XmlParser
{
public:
explicit XmlParser(DispatcherT & dispatcher, bool enableCharHandler = false)
: m_depth(0)
, m_restrictDepth(static_cast<size_t>(-1))
, m_dispatcher(dispatcher)
, m_enableCharHandler(enableCharHandler)
, m_parser(std::unique_ptr<XML_ParserStruct, decltype(&XML_ParserFree)>(XML_ParserCreate(nullptr /* encoding */),
&XML_ParserFree))
{
CHECK(m_parser, ());
OnPostCreate();
}
static void StartElementHandler(void * userData, XML_Char const * name, XML_Char const ** attrs)
{
CHECK(userData, (name));
auto * xmlParser = static_cast<XmlParser *>(userData);
xmlParser->OnStartElement(name, attrs);
}
static void EndElementHandler(void * userData, XML_Char const * name)
{
CHECK(userData, (name));
auto * xmlParser = static_cast<XmlParser *>(userData);
xmlParser->OnEndElement(name);
}
static void CharacterDataHandler(void * userData, XML_Char const * data, int length)
{
CHECK(userData, (data));
auto * xmlParser = static_cast<XmlParser *>(userData);
xmlParser->OnCharacterData(data, length);
}
void * GetBuffer(int len)
{
CHECK(m_parser, ());
return XML_GetBuffer(m_parser.get(), len);
}
XML_Status ParseBuffer(int len, int isFinal)
{
CHECK(m_parser, ());
return XML_ParseBuffer(m_parser.get(), len, isFinal);
}
void OnPostCreate()
{
CHECK(m_parser, ());
// Enable all the event routines we want
XML_SetStartElementHandler(m_parser.get(), StartElementHandler);
XML_SetEndElementHandler(m_parser.get(), EndElementHandler);
if (m_enableCharHandler)
XML_SetCharacterDataHandler(m_parser.get(), CharacterDataHandler);
XML_SetUserData(m_parser.get(), static_cast<void *>(this));
}
using StringPtrT = XML_Char const *;
// Start element handler
void OnStartElement(StringPtrT name, StringPtrT * attrs)
{
CheckCharData();
++m_depth;
if (m_depth >= m_restrictDepth)
return;
if (!m_dispatcher.Push(name))
{
m_restrictDepth = m_depth;
return;
}
for (size_t i = 0; attrs[2 * i]; ++i)
m_dispatcher.AddAttr(attrs[2 * i], attrs[2 * i + 1]);
}
// End element handler
void OnEndElement(StringPtrT name)
{
CheckCharData();
--m_depth;
if (m_depth >= m_restrictDepth)
return;
if (m_restrictDepth != size_t(-1))
m_restrictDepth = static_cast<size_t>(-1);
else
m_dispatcher.Pop(name);
}
void OnCharacterData(XML_Char const * data, int length)
{
// Accumulate character data - it can be passed by parts
// (when reading from fixed length buffer).
m_charData.append(data, length);
}
std::string GetErrorMessage()
{
if (XML_GetErrorCode(m_parser.get()) == XML_ERROR_NONE)
return {};
std::stringstream s;
s << "XML parse error at line " << XML_GetCurrentLineNumber(m_parser.get()) << " and byte "
<< XML_GetCurrentByteIndex(m_parser.get());
return s.str();
}
private:
size_t m_depth;
size_t m_restrictDepth;
DispatcherT & m_dispatcher;
std::string m_charData;
bool m_enableCharHandler;
std::unique_ptr<XML_ParserStruct, decltype(&XML_ParserFree)> m_parser;
void CheckCharData()
{
if (m_enableCharHandler && !m_charData.empty())
{
m_dispatcher.CharData(m_charData);
m_charData.clear();
}
}
};

View file

@ -0,0 +1,312 @@
#pragma once
#include "coding/files_container.hpp"
#include "coding/memory_region.hpp"
#include "coding/reader.hpp"
#include "coding/succinct_mapper.hpp"
#include "coding/write_to_sink.hpp"
#include "coding/writer.hpp"
#include "base/assert.hpp"
#include "base/checked_cast.hpp"
#include "base/logging.hpp"
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-private-field"
#endif
#include "3party/succinct/elias_fano.hpp"
#include "3party/succinct/rs_bit_vector.hpp"
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
#include <algorithm>
#include <cstdint>
#include <functional>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <vector>
// A data structure that allows storing a map from small 32-bit integers (the main use
// case is feature ids of a single mwm) to arbitrary values and accessing this map
// with a small RAM footprint.
//
// Format:
// File offset (bytes) Field name Field size (bytes)
// 0 version 2
// 2 block size 2
// 4 positions offset 4
// 8 variables offset 4
// 12 end of section 4
// 16 identifiers table positions offset - 16
// positions offset positions table variables offset - positions offset
// variables offset variables blocks end of section - variables offset
//
// Identifiers table is a bit-vector with rank-select table, where set
// bits denote that values for the corresponding features are in the
// table. Identifiers table is stored in the native endianness.
//
// Positions table is an Elias-Fano table where each entry corresponds
// to the start position of the variables block.
//
// Variables is a sequence of blocks, where each block (with the
// exception of the last one) is a sequence of kBlockSize variables
// encoded by block encoding callback.
//
// On Get call m_blockSize consecutive variables are decoded and cached in RAM.
template <typename Value>
class MapUint32ToValue
{
// 0 - initial version.
// 1 - added m_blockSize instead of m_endianess.
static uint16_t constexpr kLastVersion = 1;
public:
using ReadBlockCallback = std::function<void(NonOwningReaderSource &, uint32_t, std::vector<Value> &)>;
struct Header
{
uint16_t Read(Reader & reader)
{
NonOwningReaderSource source(reader);
auto const version = ReadPrimitiveFromSource<uint16_t>(source);
m_blockSize = ReadPrimitiveFromSource<uint16_t>(source);
if (version == 0)
m_blockSize = 64;
m_positionsOffset = ReadPrimitiveFromSource<uint32_t>(source);
m_variablesOffset = ReadPrimitiveFromSource<uint32_t>(source);
m_endOffset = ReadPrimitiveFromSource<uint32_t>(source);
return version;
}
void Write(Writer & writer)
{
WriteToSink(writer, kLastVersion);
WriteToSink(writer, m_blockSize);
WriteToSink(writer, m_positionsOffset);
WriteToSink(writer, m_variablesOffset);
WriteToSink(writer, m_endOffset);
}
uint16_t m_blockSize = 0;
uint32_t m_positionsOffset = 0;
uint32_t m_variablesOffset = 0;
uint32_t m_endOffset = 0;
};
MapUint32ToValue(Reader & reader, ReadBlockCallback const & readBlockCallback)
: m_reader(reader)
, m_readBlockCallback(readBlockCallback)
{}
/// @name Tries to get |value| for key identified by |id|.
/// @returns false if table does not have entry for this id.
/// @{
[[nodiscard]] bool Get(uint32_t id, Value & value)
{
if (id >= m_ids.size() || !m_ids[id])
return false;
uint32_t const rank = static_cast<uint32_t>(m_ids.rank(id));
uint32_t const base = rank / m_header.m_blockSize;
uint32_t const offset = rank % m_header.m_blockSize;
auto & entry = m_cache[base];
if (entry.empty())
entry = GetImpl(rank, m_header.m_blockSize);
value = entry[offset];
return true;
}
[[nodiscard]] bool GetThreadsafe(uint32_t id, Value & value) const
{
if (id >= m_ids.size() || !m_ids[id])
return false;
uint32_t const rank = static_cast<uint32_t>(m_ids.rank(id));
uint32_t const offset = rank % m_header.m_blockSize;
auto const entry = GetImpl(rank, offset + 1);
value = entry[offset];
return true;
}
/// @}
// Loads MapUint32ToValue instance. Note that |reader| must be alive
// until the destruction of loaded table. Returns nullptr if
// MapUint32ToValue can't be loaded.
// It's guaranteed that |readBlockCallback| will not be called for empty block.
static std::unique_ptr<MapUint32ToValue> Load(Reader & reader, ReadBlockCallback const & readBlockCallback)
{
auto table = std::make_unique<MapUint32ToValue>(reader, readBlockCallback);
if (!table->Init())
return {};
return table;
}
template <typename Fn>
void ForEach(Fn && fn)
{
for (uint64_t i = 0; i < m_ids.num_ones(); ++i)
{
auto const j = static_cast<uint32_t>(m_ids.select(i));
Value value;
CHECK(Get(j, value), (i, j));
fn(j, value);
}
}
uint64_t Count() const { return m_ids.num_ones(); }
private:
/// @param[in] upperSize Read until this size. Can be one of: \n
/// - m_header.m_blockSize for the regular Get version with cache \n
/// - index + 1 for the GetThreadsafe version without cache, to break when needed element is readed \n
std::vector<Value> GetImpl(uint32_t rank, uint32_t upperSize) const
{
uint32_t const base = rank / m_header.m_blockSize;
auto const start = m_offsets.select(base);
auto const end = base + 1 < m_offsets.num_ones() ? m_offsets.select(base + 1) + m_header.m_variablesOffset
: m_header.m_endOffset;
NonOwningReaderSource src(m_reader, m_header.m_variablesOffset + start, end);
// Important! Client should read while src.Size() > 0 and max |upperSize| number of elements.
std::vector<Value> values;
m_readBlockCallback(src, upperSize, values);
return values;
}
bool Init()
{
auto const version = m_header.Read(m_reader);
if (version > kLastVersion)
{
LOG(LERROR, ("Unsupported version =", version, "Last known version =", kLastVersion));
return false;
}
{
uint32_t const idsSize = m_header.m_positionsOffset - sizeof(m_header);
std::vector<uint8_t> data(idsSize);
m_reader.Read(sizeof(m_header), data.data(), data.size());
m_idsRegion = std::make_unique<CopiedMemoryRegion>(std::move(data));
coding::MapVisitor visitor(m_idsRegion->ImmutableData());
m_ids.map(visitor);
}
{
uint32_t const offsetsSize = m_header.m_variablesOffset - m_header.m_positionsOffset;
std::vector<uint8_t> data(offsetsSize);
m_reader.Read(m_header.m_positionsOffset, data.data(), data.size());
m_offsetsRegion = std::make_unique<CopiedMemoryRegion>(std::move(data));
coding::MapVisitor visitor(m_offsetsRegion->ImmutableData());
m_offsets.map(visitor);
}
return true;
}
Header m_header;
Reader & m_reader;
std::unique_ptr<CopiedMemoryRegion> m_idsRegion;
std::unique_ptr<CopiedMemoryRegion> m_offsetsRegion;
succinct::rs_bit_vector m_ids;
succinct::elias_fano m_offsets;
ReadBlockCallback m_readBlockCallback;
std::unordered_map<uint32_t, std::vector<Value>> m_cache;
};
template <typename Value>
class MapUint32ToValueBuilder
{
public:
using Iter = typename std::vector<Value>::const_iterator;
using WriteBlockCallback = std::function<void(Writer &, Iter, Iter)>;
using Map = MapUint32ToValue<Value>;
void Put(uint32_t id, Value value)
{
if (!m_ids.empty())
CHECK_LESS(m_ids.back(), id, ());
m_values.push_back(value);
m_ids.push_back(id);
}
// It's guaranteed that |writeBlockCallback| will not be called for empty block.
template <class WriterT>
void Freeze(WriterT & writer, WriteBlockCallback const & writeBlockCallback, uint16_t blockSize = 64) const
{
typename Map::Header header;
header.m_blockSize = blockSize;
auto const startOffset = writer.Pos();
header.Write(writer);
{
uint64_t const numBits = m_ids.empty() ? 0 : m_ids.back() + 1;
succinct::bit_vector_builder builder(numBits);
for (auto const & id : m_ids)
builder.set(id, true);
coding::FreezeVisitor<WriterT> visitor(writer);
succinct::rs_bit_vector(&builder).map(visitor);
}
std::vector<uint32_t> offsets;
std::vector<uint8_t> variables;
{
MemWriter<std::vector<uint8_t>> writer(variables);
for (size_t i = 0; i < m_values.size(); i += blockSize)
{
offsets.push_back(static_cast<uint32_t>(variables.size()));
auto const endOffset = std::min(i + blockSize, m_values.size());
CHECK_GREATER(endOffset, i, ());
writeBlockCallback(writer, m_values.cbegin() + i, m_values.cbegin() + endOffset);
}
}
{
succinct::elias_fano::elias_fano_builder builder(offsets.empty() ? 0 : offsets.back() + 1, offsets.size());
for (auto const & offset : offsets)
builder.push_back(offset);
header.m_positionsOffset = base::checked_cast<uint32_t>(writer.Pos() - startOffset);
coding::FreezeVisitor<WriterT> visitor(writer);
succinct::elias_fano(&builder).map(visitor);
}
{
header.m_variablesOffset = base::checked_cast<uint32_t>(writer.Pos() - startOffset);
writer.Write(variables.data(), variables.size());
header.m_endOffset = base::checked_cast<uint32_t>(writer.Pos() - startOffset);
}
auto const endOffset = writer.Pos();
writer.Seek(startOffset);
header.Write(writer);
writer.Seek(endOffset);
}
private:
std::vector<Value> m_values;
std::vector<uint32_t> m_ids;
};

View file

@ -0,0 +1,50 @@
#pragma once
#include "coding/files_container.hpp"
#include "base/macros.hpp"
#include <cstdint>
#include <utility>
#include <vector>
class MemoryRegion
{
public:
virtual ~MemoryRegion() = default;
virtual uint64_t Size() const = 0;
virtual uint8_t const * ImmutableData() const = 0;
};
class MappedMemoryRegion : public MemoryRegion
{
public:
explicit MappedMemoryRegion(FilesMappingContainer::Handle && handle) : m_handle(std::move(handle)) {}
// MemoryRegion overrides:
uint64_t Size() const override { return m_handle.GetSize(); }
uint8_t const * ImmutableData() const override { return m_handle.GetData<uint8_t>(); }
private:
FilesMappingContainer::Handle m_handle;
DISALLOW_COPY(MappedMemoryRegion);
};
class CopiedMemoryRegion : public MemoryRegion
{
public:
explicit CopiedMemoryRegion(std::vector<uint8_t> && buffer) : m_buffer(std::move(buffer)) {}
// MemoryRegion overrides:
uint64_t Size() const override { return m_buffer.size(); }
uint8_t const * ImmutableData() const override { return m_buffer.data(); }
uint8_t * MutableData() { return m_buffer.data(); }
private:
std::vector<uint8_t> m_buffer;
DISALLOW_COPY(CopiedMemoryRegion);
};

146
libs/coding/mmap_reader.cpp Normal file
View file

@ -0,0 +1,146 @@
#include "coding/mmap_reader.hpp"
#include "base/scope_guard.hpp"
#include "std/target_os.hpp"
#include <cstring>
#ifdef OMIM_OS_WINDOWS
#include "std/windows.hpp"
#else
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
class MmapReader::MmapData
{
public:
explicit MmapData(std::string const & fileName, Advice advice)
{
#ifdef OMIM_OS_WINDOWS
m_hFile = CreateFileA(fileName.c_str(), GENERIC_READ, 0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
if (m_hFile == INVALID_HANDLE_VALUE)
MYTHROW(Reader::OpenException, ("Can't open file:", fileName, "win last error:", GetLastError()));
SCOPE_GUARD(fileGuard, [this] { CloseHandle(m_hFile); });
m_hMapping = CreateFileMappingA(m_hFile, nullptr, PAGE_READONLY, 0, 0, nullptr);
if (!m_hMapping)
MYTHROW(Reader::OpenException,
("Can't create file's Windows mapping:", fileName, "win last error:", GetLastError()));
SCOPE_GUARD(mappingGuard, [this] { CloseHandle(m_hMapping); });
LARGE_INTEGER fileSize;
if (!GetFileSizeEx(m_hFile, &fileSize))
MYTHROW(Reader::OpenException, ("Can't get file size:", fileName, "win last error:", GetLastError()));
m_size = fileSize.QuadPart;
m_memory = static_cast<uint8_t *>(MapViewOfFile(m_hMapping, FILE_MAP_READ, 0, 0, 0));
if (!m_memory)
MYTHROW(Reader::OpenException,
("Can't create file's Windows mapping:", fileName, "win last error:", GetLastError()));
mappingGuard.release();
fileGuard.release();
#else
m_fd = open(fileName.c_str(), O_RDONLY | O_NONBLOCK);
if (m_fd == -1)
MYTHROW(OpenException, ("open failed for file", fileName));
struct stat s;
if (-1 == fstat(m_fd, &s))
MYTHROW(OpenException, ("fstat failed for file", fileName));
m_size = s.st_size;
m_memory = static_cast<uint8_t *>(mmap(0, static_cast<size_t>(m_size), PROT_READ, MAP_PRIVATE, m_fd, 0));
if (m_memory == MAP_FAILED)
{
close(m_fd);
MYTHROW(OpenException, ("mmap failed for file", fileName));
}
int adv = MADV_NORMAL;
switch (advice)
{
case Advice::Random: adv = MADV_RANDOM; break;
case Advice::Sequential: adv = MADV_SEQUENTIAL; break;
case Advice::Normal: adv = MADV_NORMAL; break;
}
if (madvise(m_memory, static_cast<size_t>(s.st_size), adv) != 0)
LOG(LWARNING, ("madvise error:", strerror(errno)));
#endif
}
~MmapData()
{
#ifdef OMIM_OS_WINDOWS
UnmapViewOfFile(m_memory);
CloseHandle(m_hMapping);
CloseHandle(m_hFile);
#else
munmap(m_memory, static_cast<size_t>(m_size));
close(m_fd);
#endif
}
uint8_t * m_memory = nullptr;
uint64_t m_size = 0;
private:
#ifdef OMIM_OS_WINDOWS
HANDLE m_hFile;
HANDLE m_hMapping;
#else
int m_fd = 0;
#endif
};
MmapReader::MmapReader(std::string const & fileName, Advice advice)
: base_type(fileName)
, m_data(std::make_shared<MmapData>(fileName, advice))
, m_offset(0)
, m_size(m_data->m_size)
{}
MmapReader::MmapReader(MmapReader const & reader, uint64_t offset, uint64_t size)
: base_type(reader.GetName())
, m_data(reader.m_data)
, m_offset(offset)
, m_size(size)
{}
uint64_t MmapReader::Size() const
{
return m_size;
}
void MmapReader::Read(uint64_t pos, void * p, size_t size) const
{
ASSERT_LESS_OR_EQUAL(pos + size, Size(), (pos, size));
memcpy(p, m_data->m_memory + m_offset + pos, size);
}
std::unique_ptr<Reader> MmapReader::CreateSubReader(uint64_t pos, uint64_t size) const
{
ASSERT_LESS_OR_EQUAL(pos + size, Size(), (pos, size));
// Can't use make_unique with private constructor.
return std::unique_ptr<Reader>(new MmapReader(*this, m_offset + pos, size));
}
uint8_t * MmapReader::Data() const
{
return m_data->m_memory;
}
void MmapReader::SetOffsetAndSize(uint64_t offset, uint64_t size)
{
ASSERT_LESS_OR_EQUAL(offset + size, Size(), (offset, size));
m_offset = offset;
m_size = size;
}

View file

@ -0,0 +1,43 @@
#pragma once
#include "coding/reader.hpp"
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
/// @TODO Add Windows support
class MmapReader : public ModelReader
{
public:
enum class Advice
{
Normal,
Random,
Sequential
};
explicit MmapReader(std::string const & fileName, Advice advice = Advice::Normal);
uint64_t Size() const override;
void Read(uint64_t pos, void * p, size_t size) const override;
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const override;
/// Direct file/memory access
uint8_t * Data() const;
protected:
// Used in special derived readers.
void SetOffsetAndSize(uint64_t offset, uint64_t size);
private:
using base_type = ModelReader;
class MmapData;
MmapReader(MmapReader const & reader, uint64_t offset, uint64_t size);
std::shared_ptr<MmapData> m_data;
uint64_t m_offset;
uint64_t m_size;
};

View file

@ -0,0 +1,28 @@
#include "coding/move_to_front.hpp"
#include "base/assert.hpp"
#include <algorithm>
#include <cstring>
#include <numeric>
namespace coding
{
MoveToFront::MoveToFront()
{
std::iota(m_order.begin(), m_order.end(), 0);
}
uint8_t MoveToFront::Transform(uint8_t b)
{
auto const it = std::find(m_order.begin(), m_order.end(), b);
ASSERT(it != m_order.end(), ());
size_t const result = std::distance(m_order.begin(), it);
ASSERT_LESS(result, kNumBytes, ());
std::rotate(m_order.begin(), it, it + 1);
ASSERT_EQUAL(m_order[0], b, ());
return static_cast<uint8_t>(result);
}
} // namespace coding

View file

@ -0,0 +1,26 @@
#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <limits>
namespace coding
{
class MoveToFront
{
public:
static size_t constexpr kNumBytes = static_cast<size_t>(std::numeric_limits<uint8_t>::max()) + 1;
MoveToFront();
// Returns index of the byte |b| in the current sequence of bytes,
// then moves |b| to the first position.
uint8_t Transform(uint8_t b);
uint8_t operator[](uint8_t i) const { return m_order[i]; }
private:
std::array<uint8_t, kNumBytes> m_order;
};
} // namespace coding

84
libs/coding/parse_xml.hpp Normal file
View file

@ -0,0 +1,84 @@
#pragma once
#include "coding/internal/xmlparser.hpp"
#include "base/assert.hpp"
#include "base/exception.hpp"
#include <algorithm>
#include <cstdint>
#include <exception>
DECLARE_EXCEPTION(XmlParseError, RootException);
template <typename Sequence, typename XMLDispatcher>
class XMLSequenceParser
{
public:
XMLSequenceParser(Sequence & source, XMLDispatcher & dispatcher, bool useCharData = false)
: m_res(0)
, m_numRead(0)
, m_source(source)
, m_parser(dispatcher, useCharData)
{}
bool Read()
{
char * buffer = static_cast<char *>(m_parser.GetBuffer(kBufferSize));
ASSERT(buffer, ());
m_numRead = m_source.Read(buffer, kBufferSize);
if (m_numRead == 0)
return false;
if (m_parser.ParseBuffer(static_cast<uint32_t>(m_numRead), false) == XML_STATUS_ERROR)
MYTHROW(XmlParseError, (m_parser.GetErrorMessage()));
m_res += m_numRead;
return m_numRead == kBufferSize;
}
private:
uint32_t static constexpr kBufferSize = 16 * 1024;
uint64_t m_res = 0;
uint64_t m_numRead = 0;
Sequence & m_source;
XmlParser<XMLDispatcher> m_parser;
};
template <class Source>
class SequenceAdapter
{
public:
SequenceAdapter(Source & source) : m_source(source) {}
uint64_t Read(void * p, uint64_t size)
{
size_t const correctSize = static_cast<size_t>(std::min(size, m_source.Size()));
m_source.Read(p, correctSize);
return correctSize;
}
private:
Source & m_source;
};
template <typename XMLDispatcher, typename Source>
bool ParseXML(Source & source, XMLDispatcher & dispatcher, bool useCharData = false)
{
SequenceAdapter<Source> adapter(source);
XMLSequenceParser<decltype(adapter), XMLDispatcher> parser(adapter, dispatcher, useCharData);
try
{
while (parser.Read()) /* empty */
;
}
catch (std::exception const & e)
{
LOG(LWARNING, (e.what()));
return false;
}
return true;
}

View file

@ -0,0 +1,146 @@
#include "coding/point_coding.hpp"
#include "geometry/mercator.hpp"
#include "base/assert.hpp"
#include "base/bits.hpp"
#include <algorithm>
namespace
{
double CoordSize(uint8_t coordBits)
{
ASSERT(coordBits >= 1 && coordBits <= 32, (coordBits));
return static_cast<double>((uint64_t{1} << coordBits) - 1);
}
} // namespace
uint32_t DoubleToUint32(double x, double min, double max, uint8_t coordBits)
{
ASSERT_LESS_OR_EQUAL(min, max, ());
double const coordSize = CoordSize(coordBits);
// Expand checks to avoid NANs when min == max.
double d;
if (x <= min)
d = 0;
else if (x >= max)
d = coordSize;
else
d = (x - min) / (max - min) * coordSize;
// Check in case of NANs.
ASSERT(d >= 0 && d <= coordSize, (d, x, min, max, coordBits));
return static_cast<uint32_t>(0.5 + d);
}
double Uint32ToDouble(uint32_t x, double min, double max, uint8_t coordBits)
{
ASSERT_LESS_OR_EQUAL(min, max, ());
double const coordSize = CoordSize(coordBits);
auto const d = min + static_cast<double>(x) * (max - min) / coordSize;
// It doesn't work now because of fancy serialization of m2::DiamondBox.
/// @todo Check PathsThroughLayers search test. Refactor CitiesBoundariesSerDes.
// ASSERT_LESS_OR_EQUAL(x, coordSize, (d, min, max, coordBits));
// It doesn't work because of possible floating errors.
// ASSERT(d >= min && d <= max, (d, x, min, max, coordBits));
return math::Clamp(d, min, max);
}
m2::PointU PointDToPointU(double x, double y, uint8_t coordBits)
{
using mercator::Bounds;
return {DoubleToUint32(x, Bounds::kMinX, Bounds::kMaxX, coordBits),
DoubleToUint32(y, Bounds::kMinY, Bounds::kMaxY, coordBits)};
}
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits)
{
return PointDToPointU(pt.x, pt.y, coordBits);
}
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits, m2::RectD const & limitRect)
{
return {DoubleToUint32(pt.x, limitRect.minX(), limitRect.maxX(), coordBits),
DoubleToUint32(pt.y, limitRect.minY(), limitRect.maxY(), coordBits)};
}
m2::PointD PointUToPointD(m2::PointU const & pt, uint8_t coordBits)
{
using mercator::Bounds;
return {Uint32ToDouble(pt.x, Bounds::kMinX, Bounds::kMaxX, coordBits),
Uint32ToDouble(pt.y, Bounds::kMinY, Bounds::kMaxY, coordBits)};
}
m2::PointD PointUToPointD(m2::PointU const & pt, uint8_t coordBits, m2::RectD const & limitRect)
{
return {Uint32ToDouble(pt.x, limitRect.minX(), limitRect.maxX(), coordBits),
Uint32ToDouble(pt.y, limitRect.minY(), limitRect.maxY(), coordBits)};
}
uint8_t GetCoordBits(m2::RectD const & limitRect, double accuracy)
{
auto const range = std::max(limitRect.SizeX(), limitRect.SizeY());
auto const valuesNumber = 1.0 + range / accuracy;
for (uint8_t coordBits = 1; coordBits <= 32; ++coordBits)
if (CoordSize(coordBits) >= valuesNumber)
return coordBits;
return 0;
}
// Obsolete functions ------------------------------------------------------------------------------
int64_t PointToInt64Obsolete(double x, double y, uint8_t coordBits)
{
int64_t const res = static_cast<int64_t>(PointUToUint64Obsolete(PointDToPointU(x, y, coordBits)));
ASSERT_GREATER_OR_EQUAL(res, 0, ("Highest bits of (ix, iy) are not used, so res should be > 0."));
ASSERT_LESS_OR_EQUAL(static_cast<uint64_t>(res), uint64_t{3} << 2 * kPointCoordBits, ());
return res;
}
int64_t PointToInt64Obsolete(m2::PointD const & pt, uint8_t coordBits)
{
return PointToInt64Obsolete(pt.x, pt.y, coordBits);
}
m2::PointD Int64ToPointObsolete(int64_t v, uint8_t coordBits)
{
ASSERT_GREATER_OR_EQUAL(v, 0, ("Highest bits of (ix, iy) are not used, so res should be > 0."));
ASSERT_LESS_OR_EQUAL(static_cast<uint64_t>(v), uint64_t{3} << 2 * kPointCoordBits, ());
return PointUToPointD(Uint64ToPointUObsolete(static_cast<uint64_t>(v)), coordBits);
}
std::pair<int64_t, int64_t> RectToInt64Obsolete(m2::RectD const & r, uint8_t coordBits)
{
int64_t const p1 = PointToInt64Obsolete(r.minX(), r.minY(), coordBits);
int64_t const p2 = PointToInt64Obsolete(r.maxX(), r.maxY(), coordBits);
return std::make_pair(p1, p2);
}
m2::RectD Int64ToRectObsolete(std::pair<int64_t, int64_t> const & p, uint8_t coordBits)
{
m2::PointD const pt1 = Int64ToPointObsolete(p.first, coordBits);
m2::PointD const pt2 = Int64ToPointObsolete(p.second, coordBits);
return m2::RectD(pt1, pt2);
}
uint64_t PointUToUint64Obsolete(m2::PointU const & pt)
{
uint64_t const res = bits::BitwiseMerge(pt.x, pt.y);
ASSERT_EQUAL(pt, Uint64ToPointUObsolete(res), ());
return res;
}
m2::PointU Uint64ToPointUObsolete(int64_t v)
{
m2::PointU res;
bits::BitwiseSplit(v, res.x, res.y);
return res;
}

View file

@ -0,0 +1,92 @@
#pragma once
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
#include <cstdint>
#include <utility>
uint8_t constexpr kPointCoordBits = 30;
uint8_t constexpr kFeatureSorterPointCoordBits = 27;
// The absolute precision of the point encoding in the mwm files.
// If both x and y coordinates of two points lie within |kMwmPointAccuracy| of one
// another we consider the points equal. In other words, |kMwmPointAccuracy| may
// be used as the eps value for both x and y in Point::EqualDxDy, AlmostEqualAbs and such.
//
// The constant is loosely tied to mercator::Bounds::kRangeX / (1 << kPointCoordBits):
// The range of possible values for point coordinates
// mercator::Bounds::kRangeX = 360.0
// The number of distinct values for each coordinate after encoding
// (1 << kPointCoordBits) = 1073741824 ≈ 1e9
// Distance between two discernible points in the uniform case
// 360.0 / 1e9 ≈ 4e-7 ≈ 0.04 * |kMwmPointAccuracy|.
//
// On the other hand, this should be enough for most purposes because
// 1e-5 difference in the coordinates of a mercator-projected point corresponds to roughly
// 1 meter difference on the equator and we do not expect most OSM points to be mapped
// with better precision.
//
// todo(@m) By this argument, it seems that 1e-6 is a better choice.
//
// Note. generator/feature_sorter.cpp uses |kFeatureSorterPointCoordBits|,
// effectively overriding |kPointCoordBits|. Presumably it does so to guarantee a maximum of
// 4 bytes in the varint encoding, (27+1 sign(?) bit) / 7 = 4.
// todo(@m) Clarify how kPointCoordBits and kFeatureSorterPointCoordBits are related.
double constexpr kMwmPointAccuracy = 1e-5;
uint32_t DoubleToUint32(double x, double min, double max, uint8_t coordBits);
double Uint32ToDouble(uint32_t x, double min, double max, uint8_t coordBits);
m2::PointU PointDToPointU(double x, double y, uint8_t coordBits);
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits);
m2::PointU PointDToPointU(m2::PointD const & pt, uint8_t coordBits, m2::RectD const & limitRect);
m2::PointD PointUToPointD(m2::PointU const & p, uint8_t coordBits);
m2::PointD PointUToPointD(m2::PointU const & pt, uint8_t coordBits, m2::RectD const & limitRect);
// Returns coordBits needed to encode point from given rect with given absolute precision.
// If 32 bits are not enough returns 0. It's caller's responsibility to check it.
uint8_t GetCoordBits(m2::RectD const & limitRect, double accuracy);
// All functions below are deprecated and are left
// only for backward compatibility.
//
// Their intention was to store a point with unsigned 32-bit integer
// coordinates to a signed or to an unsigned 64-bit integer by interleaving the
// bits of the point's coordinates.
//
// A possible reason for interleaving is to lower the number of bytes
// needed by the varint encoding, at least if the coordinates are of the
// same order of magnitude. However, this is hard to justify:
// 1. We have no reason to expect the coordinates to be of the same order.
// 2. If you need to serialize a point, doing it separately
// for each coordinate is almost always a better option.
// 3. If you need to temporarily store the point as an uint,
// you do not need the complexity of interleaving.
//
// By VNG: Well, for polys delta encoding WriteVarUint(BitwiseMerge(x, y)) is better than
// WriteVarUint(x) + WriteVarUint(y) by 15%. Check CitiesBoundaries_Compression test with World V0 vs V1.
//
// Another possible reason to interleave bits of x and y arises
// when implementing the Z-order curve but we have this
// written elsewhere (see geometry/cellid.hpp).
int64_t PointToInt64Obsolete(double x, double y, uint8_t coordBits);
int64_t PointToInt64Obsolete(m2::PointD const & pt, uint8_t coordBits);
m2::PointD Int64ToPointObsolete(int64_t v, uint8_t coordBits);
std::pair<int64_t, int64_t> RectToInt64Obsolete(m2::RectD const & r, uint8_t coordBits);
m2::RectD Int64ToRectObsolete(std::pair<int64_t, int64_t> const & p, uint8_t coordBits);
uint64_t PointUToUint64Obsolete(m2::PointU const & pt);
m2::PointU Uint64ToPointUObsolete(int64_t v);

View file

@ -0,0 +1,162 @@
#pragma once
#include "coding/varint.hpp"
#include "base/buffer_vector.hpp"
#include <algorithm>
#include <string>
#include <type_traits>
#include <vector>
namespace rw
{
template <class T, class TSink>
std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, void> Write(TSink & sink, T i)
{
WriteVarUint(sink, i);
}
template <class T, class TSource>
std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, void> Read(TSource & src, T & i)
{
i = ReadVarUint<T>(src);
}
template <class T, class TSink>
std::enable_if_t<std::is_integral_v<T> && std::is_signed_v<T>, void> Write(TSink & sink, T i)
{
WriteVarInt(sink, i);
}
template <class T, class TSource>
std::enable_if_t<std::is_integral_v<T> && std::is_signed_v<T>, void> Read(TSource & src, T & i)
{
i = ReadVarInt<T>(src);
}
template <class TSink>
void Write(TSink & sink, std::string const & s)
{
uint32_t const count = static_cast<uint32_t>(s.size());
WriteVarUint(sink, count);
if (!s.empty())
sink.Write(&s[0], count);
}
template <class TSource>
void Read(TSource & src, std::string & s)
{
uint32_t const count = ReadVarUint<uint32_t>(src);
s.resize(count);
if (count > 0)
src.Read(&s[0], count);
}
namespace impl
{
template <class TSink, class TCont>
void WriteCont(TSink & sink, TCont const & v)
{
uint32_t const count = static_cast<uint32_t>(v.size());
WriteVarUint(sink, count);
for (uint32_t i = 0; i < count; ++i)
Write(sink, v[i]);
}
template <class TSource, class TCont>
void ReadCont(TSource & src, TCont & v)
{
uint32_t const count = ReadVarUint<uint32_t>(src);
v.resize(count);
for (size_t i = 0; i < count; ++i)
Read(src, v[i]);
}
} // namespace impl
template <class TSink, class T>
void Write(TSink & sink, std::vector<T> const & v)
{
impl::WriteCont(sink, v);
}
template <class TSource, class T>
void Read(TSource & src, std::vector<T> & v)
{
impl::ReadCont(src, v);
}
template <class TSink, class T, size_t N>
void Write(TSink & sink, buffer_vector<T, N> const & v)
{
impl::WriteCont(sink, v);
}
template <class TSource, class T, size_t N>
void Read(TSource & src, buffer_vector<T, N> & v)
{
impl::ReadCont(src, v);
}
template <class Sink, class T>
void WritePOD(Sink & sink, T const & value)
{
static_assert(std::is_trivially_copyable<T>::value, "");
sink.Write(&value, sizeof(T));
}
template <class Sink, class T>
void ReadPOD(Sink & src, T & value)
{
static_assert(std::is_trivially_copyable<T>::value, "");
src.Read(&value, sizeof(T));
}
template <class TSource, class TCont>
void ReadVectorOfPOD(TSource & src, TCont & v)
{
typedef typename TCont::value_type ValueT;
/// This assert fails on std::pair<int, int> and OsmID class because std::pair is not trivially copyable:
/// std::pair has a non-trivial copy-assignment and move-assignment operator.
// static_assert(std::is_trivially_copyable_v<ValueT>);
uint32_t const count = ReadVarUint<uint32_t>(src);
if (count > 0)
{
v.resize(count);
src.Read(&v[0], count * sizeof(ValueT));
}
}
template <class TSink, class TCont>
void WriteVectorOfPOD(TSink & sink, TCont const & v)
{
typedef typename TCont::value_type ValueT;
/// This assert fails on std::pair<int, int> and OsmID class because std::pair is not trivially copyable:
/// std::pair has a non-trivial copy-assignment and move-assignment operator.
// static_assert(std::is_trivially_copyable_v<ValueT>);
uint32_t const count = static_cast<uint32_t>(v.size());
WriteVarUint(sink, count);
if (count > 0)
sink.Write(&v[0], count * sizeof(ValueT));
}
template <class ReaderT, class WriterT>
void ReadAndWrite(ReaderT & reader, WriterT & writer, size_t bufferSize = 4 * 1024)
{
uint64_t size = reader.Size();
std::vector<char> buffer(std::min(bufferSize, static_cast<size_t>(size)));
while (size > 0)
{
size_t const curr = std::min(bufferSize, static_cast<size_t>(size));
reader.Read(&buffer[0], curr);
writer.Write(&buffer[0], curr);
size -= curr;
}
}
} // namespace rw

8
libs/coding/reader.cpp Normal file
View file

@ -0,0 +1,8 @@
#include "coding/reader.hpp"
void Reader::ReadAsString(std::string & s) const
{
s.clear();
s.resize(static_cast<size_t>(Size()));
Read(0 /* pos */, s.data(), s.size());
}

283
libs/coding/reader.hpp Normal file
View file

@ -0,0 +1,283 @@
#pragma once
#include "coding/endianness.hpp"
#include "base/assert.hpp"
#include "base/exception.hpp"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
#include <string>
#include <type_traits>
#include <vector>
// Base class for random-access Reader. Not thread-safe.
class Reader
{
public:
DECLARE_EXCEPTION(Exception, RootException);
DECLARE_EXCEPTION(OpenException, Exception);
DECLARE_EXCEPTION(SizeException, Exception);
DECLARE_EXCEPTION(ReadException, Exception);
DECLARE_EXCEPTION(TooManyFilesException, Exception);
virtual ~Reader() = default;
virtual uint64_t Size() const = 0;
virtual void Read(uint64_t pos, void * p, size_t size) const = 0;
virtual std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const = 0;
void ReadAsString(std::string & s) const;
};
// Reader from memory.
template <bool WithExceptions>
class MemReaderTemplate : public Reader
{
public:
// Construct from block of memory.
MemReaderTemplate(void const * pData, size_t size) : m_pData(static_cast<char const *>(pData)), m_size(size) {}
explicit MemReaderTemplate(std::string_view data) : m_pData{data.data()}, m_size{data.size()} {}
uint64_t Size() const override { return m_size; }
void Read(uint64_t pos, void * p, size_t size) const override
{
AssertPosAndSize(pos, size);
memcpy(p, m_pData + pos, size);
}
MemReaderTemplate SubReader(uint64_t pos, uint64_t size) const
{
AssertPosAndSize(pos, size);
return MemReaderTemplate(m_pData + pos, static_cast<size_t>(size));
}
std::unique_ptr<Reader> CreateSubReader(uint64_t pos, uint64_t size) const override
{
AssertPosAndSize(pos, size);
return std::make_unique<MemReaderTemplate>(m_pData + pos, static_cast<size_t>(size));
}
private:
bool GoodPosAndSize(uint64_t pos, uint64_t size) const
{
// In case of 32-bit system, when sizeof(size_t) == 4.
return (pos + size <= Size() && size <= std::numeric_limits<size_t>::max());
}
void AssertPosAndSize(uint64_t pos, uint64_t size) const
{
if constexpr (WithExceptions)
{
if (!GoodPosAndSize(pos, size))
MYTHROW(Reader::SizeException, (pos, size, Size()));
}
else
{
ASSERT(GoodPosAndSize(pos, size), (pos, size, Size()));
}
}
char const * m_pData;
size_t m_size;
};
using MemReader = MemReaderTemplate<false>;
using MemReaderWithExceptions = MemReaderTemplate<true>;
// Reader wrapper to hold the pointer to a polymorphic reader.
// Common use: ReaderSource<ReaderPtr<Reader> >.
// Note! It takes the ownership of Reader.
template <class TReader>
class ReaderPtr
{
protected:
std::shared_ptr<TReader> m_p;
public:
template <typename TReaderDerived>
ReaderPtr(std::unique_ptr<TReaderDerived> p) : m_p(std::move(p))
{}
uint64_t Size() const { return m_p->Size(); }
void Read(uint64_t pos, void * p, size_t size) const { m_p->Read(pos, p, size); }
void ReadAsString(std::string & s) const { m_p->ReadAsString(s); }
ReaderPtr<Reader> SubReader(uint64_t pos, uint64_t size) const { return {m_p->CreateSubReader(pos, size)}; }
TReader * GetPtr() const { return m_p.get(); }
};
// Model reader store file id as string.
class ModelReader : public Reader
{
std::string m_name;
public:
explicit ModelReader(std::string const & name) : m_name(name) {}
std::string const & GetName() const { return m_name; }
};
// Reader pointer class for data files.
class ModelReaderPtr : public ReaderPtr<ModelReader>
{
using TBase = ReaderPtr<ModelReader>;
public:
template <typename TReaderDerived>
ModelReaderPtr(std::unique_ptr<TReaderDerived> p) : TBase(std::move(p))
{}
ModelReaderPtr SubReader(uint64_t pos, uint64_t size) const
{
return std::unique_ptr<ModelReader>(static_cast<ModelReader *>(m_p->CreateSubReader(pos, size).release()));
}
std::string const & GetName() const { return m_p->GetName(); }
};
/// Source that reads from a reader and holds Reader by non-owning reference.
/// No templates here allows to hide Deserialization functions in cpp.
class NonOwningReaderSource
{
public:
/// @note Reader shouldn't change it's size during the source's lifetime.
explicit NonOwningReaderSource(Reader const & reader) : m_reader(reader), m_pos(0), m_end(reader.Size()) {}
NonOwningReaderSource(Reader const & reader, uint64_t pos, uint64_t end) : m_reader(reader), m_pos(pos), m_end(end) {}
void Read(void * p, size_t size)
{
m_reader.Read(m_pos, p, size);
m_pos += size;
CheckPosition();
}
void Skip(uint64_t size)
{
m_pos += size;
CheckPosition();
}
uint64_t Pos() const { return m_pos; }
uint64_t Size() const
{
CheckPosition();
return m_end - m_pos;
}
void SetPosition(uint64_t pos)
{
m_pos = pos;
CheckPosition();
}
private:
void CheckPosition() const { ASSERT_LESS_OR_EQUAL(m_pos, m_end, ()); }
Reader const & m_reader;
uint64_t m_pos, m_end;
};
/// Source that reads from a reader and holds Reader by value.
template <typename TReader>
class ReaderSource
{
public:
using ReaderType = TReader;
ReaderSource(TReader const & reader) : m_reader(reader), m_pos(0) {}
void Read(void * p, size_t size)
{
m_reader.Read(m_pos, p, size);
m_pos += size;
}
void Skip(uint64_t size)
{
m_pos += size;
ASSERT(AssertPosition(), ());
}
uint64_t Pos() const { return m_pos; }
uint64_t Size() const
{
ASSERT(AssertPosition(), ());
return (m_reader.Size() - m_pos);
}
/// @todo We can avoid calling virtual Reader::SubReader and creating unique_ptr here
/// by simply making "ReaderSource ReaderSource::SubSource(pos, end)" and storing "ReaderSource::m_end"
/// like I did in NonOwningReaderSource. Unfortunately, it needs a lot of efforts in refactoring.
/// @{
TReader SubReader(uint64_t size)
{
uint64_t const pos = m_pos;
Skip(size);
return m_reader.SubReader(pos, size);
}
TReader SubReader() { return SubReader(Size()); }
std::unique_ptr<Reader> CreateSubReader(uint64_t size)
{
uint64_t const pos = m_pos;
Skip(size);
return m_reader.CreateSubReader(pos, size);
}
std::unique_ptr<Reader> CreateSubReader() { return CreateSubReader(Size()); }
/// @}
private:
bool AssertPosition() const
{
bool const ret = (m_pos <= m_reader.Size());
ASSERT(ret, (m_pos, m_reader.Size()));
return ret;
}
TReader m_reader;
uint64_t m_pos;
};
template <class TReader>
inline void ReadFromPos(TReader const & reader, uint64_t pos, void * p, size_t size)
{
reader.Read(pos, p, size);
}
template <typename TPrimitive, class TReader>
inline TPrimitive ReadPrimitiveFromPos(TReader const & reader, uint64_t pos)
{
static_assert(std::is_trivially_copyable<TPrimitive>::value);
TPrimitive primitive;
ReadFromPos(reader, pos, &primitive, sizeof(primitive));
return SwapIfBigEndianMacroBased(primitive);
}
template <typename TPrimitive, class TSource>
TPrimitive ReadPrimitiveFromSource(TSource & source)
{
static_assert(std::is_trivially_copyable<TPrimitive>::value);
TPrimitive primitive;
source.Read(&primitive, sizeof(primitive));
return SwapIfBigEndianMacroBased(primitive);
}
template <typename TPrimitive, typename TSource>
void ReadPrimitiveFromSource(TSource & source, TPrimitive & primitive)
{
primitive = ReadPrimitiveFromSource<TPrimitive, TSource>(source);
}

Some files were not shown because too many files have changed in this diff Show more