Repo created
This commit is contained in:
parent
4af19165ec
commit
68073add76
12458 changed files with 12350765 additions and 2 deletions
17
libs/descriptions/CMakeLists.txt
Normal file
17
libs/descriptions/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
project(descriptions)
|
||||
|
||||
set(SRC
|
||||
header.hpp
|
||||
loader.cpp
|
||||
loader.hpp
|
||||
serdes.hpp
|
||||
)
|
||||
|
||||
omim_add_library(${PROJECT_NAME} ${SRC})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
indexer # MwmHandle
|
||||
coding
|
||||
)
|
||||
|
||||
omim_add_test_subdirectory(descriptions_tests)
|
||||
9
libs/descriptions/descriptions_tests/CMakeLists.txt
Normal file
9
libs/descriptions/descriptions_tests/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
project(descriptions_tests)
|
||||
|
||||
set(SRC
|
||||
descriptions_tests.cpp
|
||||
)
|
||||
|
||||
omim_add_test(${PROJECT_NAME} ${SRC} NO_PLATFORM_INIT)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} descriptions)
|
||||
300
libs/descriptions/descriptions_tests/descriptions_tests.cpp
Normal file
300
libs/descriptions/descriptions_tests/descriptions_tests.cpp
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "descriptions/serdes.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace descriptions;
|
||||
|
||||
struct RawDescription
|
||||
{
|
||||
FeatureIndex m_idx;
|
||||
std::vector<std::pair<LangCode, std::string>> m_strings;
|
||||
};
|
||||
|
||||
template <typename Reader>
|
||||
std::string GetDescription(Reader & reader, FeatureIndex fid, std::vector<int8_t> const & langPriority)
|
||||
{
|
||||
Deserializer des;
|
||||
return des.Deserialize(reader, fid, langPriority);
|
||||
}
|
||||
|
||||
DescriptionsCollection Convert(std::vector<RawDescription> const & rawDescriptions)
|
||||
{
|
||||
DescriptionsCollection descriptions;
|
||||
for (auto const & desc : rawDescriptions)
|
||||
{
|
||||
descriptions.m_features.push_back({});
|
||||
FeatureDescription & ftDesc = descriptions.m_features.back();
|
||||
ftDesc.m_ftIndex = desc.m_idx;
|
||||
|
||||
for (auto const & translation : desc.m_strings)
|
||||
{
|
||||
ftDesc.m_strIndices.emplace_back(translation.first, descriptions.m_strings.size());
|
||||
descriptions.m_strings.push_back(translation.second);
|
||||
}
|
||||
}
|
||||
return descriptions;
|
||||
}
|
||||
|
||||
UNIT_TEST(Descriptions_SerDes)
|
||||
{
|
||||
std::vector<RawDescription> const data = {
|
||||
{100, {{10, "Description of feature 100, language 10."}, {11, "Описание фичи 100, язык 11."}}},
|
||||
{101, {{11, "Описание фичи 101, язык 11."}}},
|
||||
{102, {{11, "Описание фичи 102, язык 11."}, {10, "Description of feature 102, language 10."}}}};
|
||||
|
||||
auto testData = [](MemReader & reader)
|
||||
{
|
||||
TEST_EQUAL(GetDescription(reader, 102, {11, 10}), "Описание фичи 102, язык 11.", ());
|
||||
TEST_EQUAL(GetDescription(reader, 100, {12, 10}), "Description of feature 100, language 10.", ());
|
||||
TEST_EQUAL(GetDescription(reader, 101, {12}), "", ());
|
||||
TEST_EQUAL(GetDescription(reader, 0, {10, 11}), "", ());
|
||||
TEST_EQUAL(GetDescription(reader, 102, {10}), "Description of feature 102, language 10.", ());
|
||||
};
|
||||
|
||||
{
|
||||
std::vector<uint8_t> buffer;
|
||||
{
|
||||
auto descriptionsCollection = Convert(data);
|
||||
Serializer ser(std::move(descriptionsCollection));
|
||||
MemWriter<decltype(buffer)> writer(buffer);
|
||||
ser.Serialize(writer);
|
||||
}
|
||||
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
|
||||
testData(reader);
|
||||
}
|
||||
|
||||
{
|
||||
size_t const kDummyBytesCount = 100;
|
||||
std::vector<uint8_t> buffer(kDummyBytesCount);
|
||||
{
|
||||
auto descriptionsCollection = Convert(data);
|
||||
Serializer ser(std::move(descriptionsCollection));
|
||||
MemWriter<decltype(buffer)> writer(buffer);
|
||||
writer.Seek(kDummyBytesCount);
|
||||
ser.Serialize(writer);
|
||||
|
||||
std::vector<uint8_t> buffer2(kDummyBytesCount);
|
||||
writer.Write(buffer2.data(), buffer2.size());
|
||||
}
|
||||
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
auto subReader = reader.SubReader(kDummyBytesCount, buffer.size() - kDummyBytesCount);
|
||||
|
||||
testData(subReader);
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(Descriptions_Html)
|
||||
{
|
||||
std::vector<RawDescription> const data = {
|
||||
{100,
|
||||
{{1,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"en\">Map data © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> contributors, <a href=\"http://opendatacommons.org/licenses/odbl/\">ODbL</a>.</p>\n"
|
||||
"</div>"},
|
||||
{2,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"ru\">Картографические данные © участники <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>, <a href=\"http://opendatacommons.org/licenses/odbl/\">ODbL</a>.</p>\n"
|
||||
"</div>"},
|
||||
{3,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"vi\">Dữ liệu bản đồ © Cộng tác viên của <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{4,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"tr\">Harita verileri © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> katkıları, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{5,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"th\">ข้อมูลแผนที่ © ผู้มีส่วนร่วม <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{6,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"sv\">Map data © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>-bidragsgivare, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{7,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"es\">Contribuidores de los datos de mapas ©"
|
||||
" <a href=\"https://www.openstreetmap.org/\">OpenStreetMap</a>, Licencia ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{8,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"pt\">Dados de mapas de contribuintes do ©"
|
||||
" <a href=\"https://www.openstreetmap.org/\">OpenStreetMap</a>, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{9,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"pl\">Dane map © Współautorzy"
|
||||
" <a href=\"https://www.openstreetmap.org/\">OpenStreetMap</a>, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{10,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"nb\">Kartdata © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> bidragsytere, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{11,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"ko\">지도 데이터 © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> contributors, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{12,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"ja\">地図データ © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>貢献者、ODbL。</p>\n"
|
||||
"</div>"},
|
||||
{13,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"it\">Dati delle mappe © Contenuti <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{14,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"id\">Data Peta © Kontributor <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{15,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"hu\">Térképadat © az <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> közreműködői, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{16,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"de\">Kartendaten © <a href=\"http://www.opentreetmap.org/\">"
|
||||
"OpenStreetMap</a>-Mitwirkende ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{17,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"fr\">Données de la carte sous © des contributeurs d'"
|
||||
"<a href=\"https://www.openstreetmap.org/\">OpenStreetMap</a>, licence ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{18,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"fi\">Karttatiedot © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>-avustajat, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{19,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"nl\">Kaartgegevens © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> bjdragers, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{20,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"cs\">Mapová data © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> přispěvatelé, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{21,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"zh-Hans\">地图数据 © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> 贡献者, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{22,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"zh-Hant\">地圖數據 © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> 貢獻者, ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{23,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"ar\">المساهمون في بيانات خريطة © <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a> و ODbL.</p>\n"
|
||||
"</div>"},
|
||||
{24,
|
||||
"<div class=\"section\">\n"
|
||||
"<p lang=\"uk\">Картографічні дані © учасники <a href=\"https://www.openstreetmap.org/\">"
|
||||
"OpenStreetMap</a>, ODbL.</p>"
|
||||
"</div>"}}},
|
||||
{101,
|
||||
{{1,
|
||||
"<p lang=\"en\"><strong>omaps.app</strong> would not be possible without the generous contributions of "
|
||||
"the following projects:</p>\n"},
|
||||
{2,
|
||||
"<p lang=\"ru\">Приложение <strong>omaps.app</strong> было бы невозможно без участия следующих"
|
||||
" проектов:</p>\n"},
|
||||
{3,
|
||||
"<p lang=\"vi\"><strong>omaps.app</strong> sẽ không thành hiện thực nếu không có sự đóng góp hào phóng"
|
||||
" từ các dự án sau:</p>\n"},
|
||||
{4,
|
||||
"<p lang=\"tr\"><strong>omaps.app</strong>, aşağıdaki projelerin cömert katkıları olmadan mümkün "
|
||||
"olmazdı:</p>\n"},
|
||||
{5,
|
||||
"<p lang=\"th\"><strong>omaps.app</strong> จะสำเร็จลุล่วงไม่ได้เลยหากปราศจากความเอื้อเฟื้อเพื่อการร่วมมือของโป"
|
||||
"รเจกต์ดังต่อไปนี้:</p>\n"},
|
||||
{6,
|
||||
"<p lang=\"sv\"><strong>omaps.app</strong> skulle inte vara möjlig utan följande projekts generösa"
|
||||
" bidrag:</p>\n"},
|
||||
{7,
|
||||
"<p lang=\"es\"><strong>omaps.app</strong> no sería posible sin las generosas aportaciones de los"
|
||||
" siguientes proyectos:</p>\n"},
|
||||
{8,
|
||||
"<p lang=\"pt\">O <strong>omaps.app</strong> não seria possível sem as contribuições generosas dos "
|
||||
"seguintes projetos:</p>\n"},
|
||||
{9,
|
||||
"<p lang=\"pl\">Aplikacja <strong>omaps.app</strong> nie powstałaby bez znaczącego wkładu ze strony "
|
||||
"twórców poniższych projektów:</p>\n"},
|
||||
{10,
|
||||
"<p lang=\"nb\"><strong>omaps.app</strong> ville ikke vært mulig uten de generøse bidragene fra "
|
||||
"følgende prosjekter:</p>\n"},
|
||||
{11, "<p lang=\"ko\"><strong>omaps.app</strong>는 다음 프로젝트의 아낌없는 기부없이 가능하지 않습니다:</p>\n"},
|
||||
{12, "<p lang=\"ja\"><strong>omaps.app</strong>は次のプロジェクトの手厚い貢献なしには不可能です:</p>\n"},
|
||||
{13,
|
||||
"<p lang=\"it\"><strong>omaps.app</strong> non sarebbe realizzabile senza il generoso contributo "
|
||||
"dei seguenti progetti:</p>\n"},
|
||||
{14,
|
||||
"<p lang=\"id\"><strong>omaps.app</strong> tidak mungkin tercipta tanpa kontribusi yang tulus dari "
|
||||
"proyek-proyek berikut ini:</p>\n"},
|
||||
{15,
|
||||
"<p lang=\"hu\">A <strong>omaps.app</strong> nem jöhetett volna létre az alábbi projektek nagylelkű "
|
||||
"közreműködése nélkül:</p>\n"},
|
||||
{16,
|
||||
"<p lang=\"de\"><strong>omaps.app</strong> wäre ohne die großzügigen Spenden der folgenden Projekte "
|
||||
"nicht möglich:</p>\n"},
|
||||
{17,
|
||||
"<p lang=\"fr\">L'existence de <strong>omaps.app</strong> serait impossible sans les généreuses "
|
||||
"contributions des projets suivants :</p>\n"},
|
||||
{18,
|
||||
"<p lang=\"fi\"><strong>omaps.app</strong> ei olisi mahdollinen ilman seuraavien projektien aulista "
|
||||
"tukea:</p>\n"},
|
||||
{19,
|
||||
"<p lang=\"nl\"><strong>omaps.app</strong> zou niet mogelijk zijn zonder de genereuze bijdragen voor "
|
||||
"de volgende projecten:</p>\n"},
|
||||
{20,
|
||||
"<p lang=\"cs\"><strong>omaps.app</strong> by nemohlo existovat bez štědrých přispění následujících "
|
||||
"projektů:</p>\n"},
|
||||
{21, "<p lang=\"zh-Hans\">沒有下面項目的慷慨貢獻,<strong>omaps.app</strong> 不可能出現:</p>\n"},
|
||||
{22, "<p lang=\"zh-Hant\">沒有下面項目的慷慨貢獻,<strong>omaps.app</strong> 不可能出現:</p>\n"},
|
||||
{23,
|
||||
"<p lang=\"ar\"> ما كان لـ <strong>maps.me"
|
||||
"</strong> أن تأتي للوجود بدون المساهمات العظيمة للمشاريع التالية:</p>"},
|
||||
{24, "<p lang=\"uk\"><strong>omaps.app</strong> був би неможливим без щедрої участі таких проектів:</p>"}}},
|
||||
};
|
||||
|
||||
{
|
||||
std::vector<uint8_t> buffer;
|
||||
{
|
||||
auto descriptionsCollection = Convert(data);
|
||||
Serializer ser(std::move(descriptionsCollection));
|
||||
MemWriter<decltype(buffer)> writer(buffer);
|
||||
ser.Serialize(writer);
|
||||
}
|
||||
|
||||
MemReader reader(buffer.data(), buffer.size());
|
||||
|
||||
for (auto const & rawDesc : data)
|
||||
for (auto const & translation : rawDesc.m_strings)
|
||||
TEST_EQUAL(GetDescription(reader, rawDesc.m_idx, {translation.first}), translation.second, ());
|
||||
}
|
||||
}
|
||||
43
libs/descriptions/header.hpp
Normal file
43
libs/descriptions/header.hpp
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/serdes_binary_header.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace descriptions
|
||||
{
|
||||
struct HeaderV0
|
||||
{
|
||||
template <typename Visitor>
|
||||
void Visit(Visitor & visitor)
|
||||
{
|
||||
visitor(m_featuresOffset, "featuresOffset");
|
||||
visitor(m_langMetaOffset, "langMetaOffset");
|
||||
visitor(m_indexOffset, "indexOffset");
|
||||
visitor(m_stringsOffset, "stringsOffset");
|
||||
visitor(m_eosOffset, "eosOffset");
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
void Serialize(Sink & sink)
|
||||
{
|
||||
coding::binary::HeaderSerVisitor<Sink> visitor(sink);
|
||||
visitor(*this);
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
void Deserialize(Source & source)
|
||||
{
|
||||
coding::binary::HeaderDesVisitor<Source> visitor(source);
|
||||
visitor(*this);
|
||||
}
|
||||
|
||||
uint64_t m_featuresOffset = 0;
|
||||
uint64_t m_langMetaOffset = 0;
|
||||
uint64_t m_indexOffset = 0;
|
||||
uint64_t m_stringsOffset = 0;
|
||||
uint64_t m_eosOffset = 0; // End of section.
|
||||
};
|
||||
} // namespace descriptions
|
||||
36
libs/descriptions/loader.cpp
Normal file
36
libs/descriptions/loader.cpp
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#include "descriptions/loader.hpp"
|
||||
|
||||
#include "indexer/data_source.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include "defines.hpp"
|
||||
|
||||
namespace descriptions
|
||||
{
|
||||
std::string Loader::GetWikiDescription(FeatureID const & featureId, std::vector<int8_t> const & langPriority)
|
||||
{
|
||||
auto const handle = m_dataSource.GetMwmHandleById(featureId.m_mwmId);
|
||||
|
||||
if (!handle.IsAlive())
|
||||
return {};
|
||||
|
||||
auto const & value = *handle.GetValue();
|
||||
|
||||
if (!value.m_cont.IsExist(DESCRIPTIONS_FILE_TAG))
|
||||
return {};
|
||||
|
||||
EntryPtr entry;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
entry = m_deserializers.try_emplace(featureId.m_mwmId, std::make_shared<Entry>()).first->second;
|
||||
}
|
||||
|
||||
ASSERT(entry, ());
|
||||
|
||||
auto readerPtr = value.m_cont.GetReader(DESCRIPTIONS_FILE_TAG);
|
||||
|
||||
std::lock_guard<std::mutex> lock(entry->m_mutex);
|
||||
return entry->m_deserializer.Deserialize(*readerPtr.GetPtr(), featureId.m_index, langPriority);
|
||||
}
|
||||
} // namespace descriptions
|
||||
40
libs/descriptions/loader.hpp
Normal file
40
libs/descriptions/loader.hpp
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#pragma once
|
||||
|
||||
#include "descriptions/serdes.hpp"
|
||||
|
||||
#include "indexer/feature_decl.hpp"
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace descriptions
|
||||
{
|
||||
// *NOTE* This class IS thread-safe.
|
||||
class Loader
|
||||
{
|
||||
public:
|
||||
explicit Loader(DataSource const & dataSource) : m_dataSource(dataSource) {}
|
||||
|
||||
std::string GetWikiDescription(FeatureID const & featureId, std::vector<int8_t> const & langPriority);
|
||||
|
||||
private:
|
||||
struct Entry
|
||||
{
|
||||
std::mutex m_mutex;
|
||||
Deserializer m_deserializer;
|
||||
};
|
||||
|
||||
using EntryPtr = std::shared_ptr<Entry>;
|
||||
|
||||
DataSource const & m_dataSource;
|
||||
std::map<MwmSet::MwmId, EntryPtr> m_deserializers;
|
||||
std::mutex m_mutex;
|
||||
};
|
||||
} // namespace descriptions
|
||||
271
libs/descriptions/serdes.hpp
Normal file
271
libs/descriptions/serdes.hpp
Normal file
|
|
@ -0,0 +1,271 @@
|
|||
#pragma once
|
||||
|
||||
#include "descriptions/header.hpp"
|
||||
|
||||
#include "coding/dd_vector.hpp"
|
||||
#include "coding/text_storage.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/buffer_vector.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace descriptions
|
||||
{
|
||||
using FeatureIndex = uint32_t;
|
||||
using StringIndex = uint32_t;
|
||||
using LangCode = int8_t;
|
||||
using LangMeta = buffer_vector<std::pair<LangCode, StringIndex>, 8>;
|
||||
using LangMetaOffset = uint32_t;
|
||||
|
||||
enum class Version : uint8_t
|
||||
{
|
||||
V0 = 0,
|
||||
Latest = V0
|
||||
};
|
||||
|
||||
struct FeatureDescription
|
||||
{
|
||||
FeatureIndex m_ftIndex = 0;
|
||||
LangMeta m_strIndices;
|
||||
};
|
||||
|
||||
struct DescriptionsCollection
|
||||
{
|
||||
std::vector<FeatureDescription> m_features;
|
||||
std::vector<std::string> m_strings;
|
||||
|
||||
size_t GetFeaturesCount() const { return m_features.size(); }
|
||||
};
|
||||
|
||||
/// \brief
|
||||
/// Section name: "descriptions".
|
||||
/// Description: keeping text descriptions of features in different languages.
|
||||
/// Section tables:
|
||||
/// * version
|
||||
/// * header
|
||||
/// * sorted feature ids vector
|
||||
/// * vector of unordered maps with language codes and string indices of corresponding translations of a description
|
||||
/// * vector of maps offsets for each feature id (and one additional dummy offset in the end)
|
||||
/// * BWT-compressed strings grouped by language.
|
||||
class Serializer
|
||||
{
|
||||
public:
|
||||
/// \param descriptions A non-empty unsorted collection of feature descriptions.
|
||||
/// FeatureDescription::m_description must contain non-empty translations.
|
||||
explicit Serializer(DescriptionsCollection && descriptions) : m_collection(std::move(descriptions))
|
||||
{
|
||||
std::sort(m_collection.m_features.begin(), m_collection.m_features.end(),
|
||||
base::LessBy(&FeatureDescription::m_ftIndex));
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
void Serialize(Sink & sink)
|
||||
{
|
||||
WriteToSink(sink, static_cast<uint8_t>(Version::Latest));
|
||||
|
||||
auto const startPos = sink.Pos();
|
||||
|
||||
HeaderV0 header;
|
||||
header.Serialize(sink);
|
||||
|
||||
header.m_featuresOffset = sink.Pos() - startPos;
|
||||
SerializeFeaturesIndices(sink);
|
||||
|
||||
std::vector<LangMetaOffset> offsets;
|
||||
header.m_langMetaOffset = sink.Pos() - startPos;
|
||||
SerializeLangMetaCollection(sink, offsets);
|
||||
|
||||
header.m_indexOffset = sink.Pos() - startPos;
|
||||
SerializeLangMetaIndex(sink, offsets);
|
||||
|
||||
header.m_stringsOffset = sink.Pos() - startPos;
|
||||
SerializeStrings(sink);
|
||||
|
||||
header.m_eosOffset = sink.Pos() - startPos;
|
||||
sink.Seek(startPos);
|
||||
header.Serialize(sink);
|
||||
sink.Seek(startPos + header.m_eosOffset);
|
||||
}
|
||||
|
||||
// Serializes a vector of 32-bit sorted feature ids.
|
||||
template <typename Sink>
|
||||
void SerializeFeaturesIndices(Sink & sink)
|
||||
{
|
||||
for (auto const & index : m_collection.m_features)
|
||||
WriteToSink(sink, index.m_ftIndex);
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
void SerializeLangMetaCollection(Sink & sink, std::vector<LangMetaOffset> & offsets)
|
||||
{
|
||||
auto const startPos = sink.Pos();
|
||||
for (auto const & meta : m_collection.m_features)
|
||||
{
|
||||
offsets.push_back(static_cast<LangMetaOffset>(sink.Pos() - startPos));
|
||||
for (auto const & pair : meta.m_strIndices)
|
||||
{
|
||||
WriteToSink(sink, pair.first);
|
||||
WriteVarUint(sink, pair.second);
|
||||
}
|
||||
}
|
||||
offsets.push_back(static_cast<LangMetaOffset>(sink.Pos() - startPos));
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
void SerializeLangMetaIndex(Sink & sink, std::vector<LangMetaOffset> const & offsets)
|
||||
{
|
||||
for (auto const & offset : offsets)
|
||||
WriteToSink(sink, offset);
|
||||
}
|
||||
|
||||
// Serializes strings in a compressed storage with block access.
|
||||
template <typename Sink>
|
||||
void SerializeStrings(Sink & sink)
|
||||
{
|
||||
coding::BlockedTextStorageWriter<Sink> writer(sink, 200000 /* blockSize */);
|
||||
for (auto const & s : m_collection.m_strings)
|
||||
writer.Append(s);
|
||||
}
|
||||
|
||||
private:
|
||||
DescriptionsCollection m_collection;
|
||||
};
|
||||
|
||||
class Deserializer
|
||||
{
|
||||
public:
|
||||
using LangPriorities = std::vector<LangCode>;
|
||||
|
||||
template <typename Reader>
|
||||
std::string Deserialize(Reader & reader, FeatureIndex featureIndex, LangPriorities const & langPriority)
|
||||
{
|
||||
NonOwningReaderSource source(reader);
|
||||
auto const version = static_cast<Version>(ReadPrimitiveFromSource<uint8_t>(source));
|
||||
|
||||
auto subReader = reader.CreateSubReader(source.Pos(), source.Size());
|
||||
CHECK(subReader, ());
|
||||
CHECK(version == Version::V0, ());
|
||||
return DeserializeV0(*subReader, featureIndex, langPriority);
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
std::string DeserializeV0(Reader & reader, FeatureIndex featureIndex, LangPriorities const & langPriority)
|
||||
{
|
||||
InitializeIfNeeded(reader);
|
||||
|
||||
LangMetaOffset startOffset = 0;
|
||||
LangMetaOffset endOffset = 0;
|
||||
{
|
||||
ReaderPtr<Reader> idsSubReader(CreateFeatureIndicesSubReader(reader));
|
||||
DDVector<FeatureIndex, ReaderPtr<Reader>> ids(idsSubReader);
|
||||
auto const it = std::lower_bound(ids.begin(), ids.end(), featureIndex);
|
||||
if (it == ids.end() || *it != featureIndex)
|
||||
return {};
|
||||
|
||||
auto const d = static_cast<uint32_t>(std::distance(ids.begin(), it));
|
||||
|
||||
ReaderPtr<Reader> ofsSubReader(CreateLangMetaOffsetsSubReader(reader));
|
||||
DDVector<LangMetaOffset, ReaderPtr<Reader>> ofs(ofsSubReader);
|
||||
CHECK_LESS(d, ofs.size(), ());
|
||||
CHECK_LESS(d + 1, ofs.size(), ());
|
||||
|
||||
startOffset = ofs[d];
|
||||
endOffset = ofs[d + 1];
|
||||
}
|
||||
|
||||
LangMeta langMeta;
|
||||
{
|
||||
auto langMetaSubReader = CreateLangMetaSubReader(reader, startOffset, endOffset);
|
||||
NonOwningReaderSource source(*langMetaSubReader);
|
||||
|
||||
while (source.Size() > 0)
|
||||
{
|
||||
auto const lang = ReadPrimitiveFromSource<LangCode>(source);
|
||||
auto const stringIndex = ReadVarUint<StringIndex>(source);
|
||||
langMeta.emplace_back(lang, stringIndex);
|
||||
}
|
||||
}
|
||||
|
||||
auto stringsSubReader = CreateStringsSubReader(reader);
|
||||
for (LangCode const lang : langPriority)
|
||||
{
|
||||
for (auto const & meta : langMeta)
|
||||
if (lang == meta.first)
|
||||
return m_stringsReader.ExtractString(*stringsSubReader, meta.second);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
std::unique_ptr<Reader> CreateFeatureIndicesSubReader(Reader & reader)
|
||||
{
|
||||
CHECK(m_initialized, ());
|
||||
|
||||
auto const pos = m_header.m_featuresOffset;
|
||||
CHECK_GREATER_OR_EQUAL(m_header.m_langMetaOffset, pos, ());
|
||||
auto const size = m_header.m_langMetaOffset - pos;
|
||||
return reader.CreateSubReader(pos, size);
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
std::unique_ptr<Reader> CreateLangMetaOffsetsSubReader(Reader & reader)
|
||||
{
|
||||
CHECK(m_initialized, ());
|
||||
|
||||
auto const pos = m_header.m_indexOffset;
|
||||
CHECK_GREATER_OR_EQUAL(m_header.m_stringsOffset, pos, ());
|
||||
auto const size = m_header.m_stringsOffset - pos;
|
||||
return reader.CreateSubReader(pos, size);
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
std::unique_ptr<Reader> CreateLangMetaSubReader(Reader & reader, LangMetaOffset startOffset, LangMetaOffset endOffset)
|
||||
{
|
||||
CHECK(m_initialized, ());
|
||||
|
||||
auto const pos = m_header.m_langMetaOffset + startOffset;
|
||||
CHECK_GREATER_OR_EQUAL(m_header.m_indexOffset, pos, ());
|
||||
auto const size = endOffset - startOffset;
|
||||
CHECK_GREATER_OR_EQUAL(m_header.m_indexOffset, pos + size, ());
|
||||
return reader.CreateSubReader(pos, size);
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
std::unique_ptr<Reader> CreateStringsSubReader(Reader & reader)
|
||||
{
|
||||
CHECK(m_initialized, ());
|
||||
|
||||
auto const pos = m_header.m_stringsOffset;
|
||||
CHECK_GREATER_OR_EQUAL(m_header.m_eosOffset, pos, ());
|
||||
auto const size = m_header.m_eosOffset - pos;
|
||||
return reader.CreateSubReader(pos, size);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Reader>
|
||||
void InitializeIfNeeded(Reader & reader)
|
||||
{
|
||||
if (m_initialized)
|
||||
return;
|
||||
|
||||
{
|
||||
NonOwningReaderSource source(reader);
|
||||
m_header.Deserialize(source);
|
||||
}
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
|
||||
bool m_initialized = false;
|
||||
HeaderV0 m_header;
|
||||
coding::BlockedTextStorageReader m_stringsReader;
|
||||
};
|
||||
} // namespace descriptions
|
||||
Loading…
Add table
Add a link
Reference in a new issue