Repo created
This commit is contained in:
parent
4af19165ec
commit
68073add76
12458 changed files with 12350765 additions and 2 deletions
188
libs/search/CMakeLists.txt
Normal file
188
libs/search/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
project(search)
|
||||
|
||||
set(SRC
|
||||
algos.hpp
|
||||
approximate_string_match.cpp
|
||||
approximate_string_match.hpp
|
||||
base/inverted_list.hpp
|
||||
base/mem_search_index.hpp
|
||||
base/text_index/dictionary.hpp
|
||||
base/text_index/header.cpp
|
||||
base/text_index/header.hpp
|
||||
base/text_index/mem.cpp
|
||||
base/text_index/mem.hpp
|
||||
base/text_index/merger.cpp
|
||||
base/text_index/merger.hpp
|
||||
base/text_index/postings.hpp
|
||||
base/text_index/reader.hpp
|
||||
base/text_index/text_index.cpp
|
||||
base/text_index/text_index.hpp
|
||||
base/text_index/utils.hpp
|
||||
bookmarks/data.cpp
|
||||
bookmarks/data.hpp
|
||||
bookmarks/processor.cpp
|
||||
bookmarks/processor.hpp
|
||||
bookmarks/results.hpp
|
||||
bookmarks/types.cpp
|
||||
bookmarks/types.hpp
|
||||
cancel_exception.hpp
|
||||
categories_cache.cpp
|
||||
categories_cache.hpp
|
||||
categories_set.hpp
|
||||
cbv.cpp
|
||||
cbv.hpp
|
||||
cities_boundaries_table.cpp
|
||||
cities_boundaries_table.hpp
|
||||
city_finder.cpp
|
||||
city_finder.hpp
|
||||
common.hpp
|
||||
cuisine_filter.cpp
|
||||
cuisine_filter.hpp
|
||||
displayed_categories.cpp
|
||||
displayed_categories.hpp
|
||||
doc_vec.cpp
|
||||
doc_vec.hpp
|
||||
downloader_search_callback.cpp
|
||||
downloader_search_callback.hpp
|
||||
dummy_rank_table.cpp
|
||||
dummy_rank_table.hpp
|
||||
editor_delegate.cpp
|
||||
editor_delegate.hpp
|
||||
emitter.hpp
|
||||
engine.cpp
|
||||
engine.hpp
|
||||
feature_loader.cpp
|
||||
feature_loader.hpp
|
||||
feature_offset_match.hpp
|
||||
features_filter.cpp
|
||||
features_filter.hpp
|
||||
features_layer.cpp
|
||||
features_layer.hpp
|
||||
features_layer_matcher.cpp
|
||||
features_layer_matcher.hpp
|
||||
# Template functions implementation. Should be included when using.
|
||||
# features_layer_path_finder.cpp
|
||||
features_layer_path_finder.hpp
|
||||
filtering_params.hpp
|
||||
geocoder.cpp
|
||||
geocoder.hpp
|
||||
geocoder_context.cpp
|
||||
geocoder_context.hpp
|
||||
geocoder_locality.cpp
|
||||
geocoder_locality.hpp
|
||||
geometry_cache.cpp
|
||||
geometry_cache.hpp
|
||||
geometry_utils.cpp
|
||||
geometry_utils.hpp
|
||||
highlighting.cpp
|
||||
highlighting.hpp
|
||||
house_detector.cpp
|
||||
house_detector.hpp
|
||||
house_numbers_matcher.cpp
|
||||
house_numbers_matcher.hpp
|
||||
house_to_street_table.cpp
|
||||
house_to_street_table.hpp
|
||||
idf_map.cpp
|
||||
idf_map.hpp
|
||||
intermediate_result.cpp
|
||||
intermediate_result.hpp
|
||||
intersection_result.cpp
|
||||
intersection_result.hpp
|
||||
interval_set.hpp
|
||||
keyword_lang_matcher.cpp
|
||||
keyword_lang_matcher.hpp
|
||||
keyword_matcher.cpp
|
||||
keyword_matcher.hpp
|
||||
latlon_match.cpp
|
||||
latlon_match.hpp
|
||||
lazy_centers_table.cpp
|
||||
lazy_centers_table.hpp
|
||||
localities_source.cpp
|
||||
localities_source.hpp
|
||||
locality_finder.cpp
|
||||
locality_finder.hpp
|
||||
locality_scorer.cpp
|
||||
locality_scorer.hpp
|
||||
mode.cpp
|
||||
mode.hpp
|
||||
model.cpp
|
||||
model.hpp
|
||||
mwm_context.cpp
|
||||
mwm_context.hpp
|
||||
nested_rects_cache.cpp
|
||||
nested_rects_cache.hpp
|
||||
point_rect_matcher.hpp
|
||||
postcode_points.cpp
|
||||
postcode_points.hpp
|
||||
pre_ranker.cpp
|
||||
pre_ranker.hpp
|
||||
pre_ranking_info.cpp
|
||||
pre_ranking_info.hpp
|
||||
processor.cpp
|
||||
processor.hpp
|
||||
projection_on_street.cpp
|
||||
projection_on_street.hpp
|
||||
query_params.cpp
|
||||
query_params.hpp
|
||||
query_saver.cpp
|
||||
query_saver.hpp
|
||||
ranker.cpp
|
||||
ranker.hpp
|
||||
ranking_info.cpp
|
||||
ranking_info.hpp
|
||||
ranking_utils.cpp
|
||||
ranking_utils.hpp
|
||||
region_address_getter.cpp
|
||||
region_address_getter.hpp
|
||||
region_info_getter.cpp
|
||||
region_info_getter.hpp
|
||||
result.cpp
|
||||
result.hpp
|
||||
retrieval.cpp
|
||||
retrieval.hpp
|
||||
reverse_geocoder.cpp
|
||||
reverse_geocoder.hpp
|
||||
search_index_values.hpp
|
||||
search_params.cpp
|
||||
search_params.hpp
|
||||
search_trie.hpp
|
||||
segment_tree.cpp
|
||||
segment_tree.hpp
|
||||
stats_cache.hpp
|
||||
street_vicinity_loader.cpp
|
||||
street_vicinity_loader.hpp
|
||||
streets_matcher.cpp
|
||||
streets_matcher.hpp
|
||||
string_utils.cpp
|
||||
string_utils.hpp
|
||||
suggest.cpp
|
||||
suggest.hpp
|
||||
token_range.hpp
|
||||
token_slice.cpp
|
||||
token_slice.hpp
|
||||
tracer.cpp
|
||||
tracer.hpp
|
||||
types_skipper.cpp
|
||||
types_skipper.hpp
|
||||
utils.cpp
|
||||
utils.hpp
|
||||
utm_mgrs_coords_match.cpp
|
||||
utm_mgrs_coords_match.hpp
|
||||
)
|
||||
|
||||
omim_add_library(${PROJECT_NAME} ${SRC})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
editor
|
||||
storage
|
||||
ge0
|
||||
openlocationcode
|
||||
)
|
||||
|
||||
if(PLATFORM_DESKTOP)
|
||||
add_subdirectory(search_tests_support)
|
||||
add_subdirectory(search_quality)
|
||||
endif()
|
||||
|
||||
omim_add_test_subdirectory(search_tests)
|
||||
omim_add_test_subdirectory(search_integration_tests)
|
||||
80
libs/search/algos.hpp
Normal file
80
libs/search/algos.hpp
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/base.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
struct LS
|
||||
{
|
||||
size_t prevDecreasePos, decreaseValue;
|
||||
size_t prevIncreasePos, increaseValue;
|
||||
|
||||
LS(size_t i)
|
||||
{
|
||||
prevDecreasePos = i;
|
||||
decreaseValue = 1;
|
||||
prevIncreasePos = i;
|
||||
increaseValue = 1;
|
||||
}
|
||||
};
|
||||
} // namespace impl
|
||||
|
||||
template <typename T, typename OutIter, typename Comp>
|
||||
void LongestSubsequence(std::vector<T> const & in, OutIter out, Comp cmp)
|
||||
{
|
||||
if (in.empty())
|
||||
return;
|
||||
|
||||
std::vector<impl::LS> v;
|
||||
v.reserve(in.size());
|
||||
for (size_t i = 0; i < in.size(); ++i)
|
||||
v.push_back(impl::LS(i));
|
||||
|
||||
size_t res = 1;
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < v.size(); ++i)
|
||||
{
|
||||
for (size_t j = i + 1; j < v.size(); ++j)
|
||||
{
|
||||
if (cmp.Less(in[i], in[j]) && v[i].increaseValue + 1 >= v[j].increaseValue)
|
||||
{
|
||||
v[j].increaseValue = v[i].increaseValue + 1;
|
||||
v[j].prevIncreasePos = i;
|
||||
}
|
||||
|
||||
if (cmp.Greater(in[i], in[j]) && v[i].decreaseValue + 1 >= v[j].decreaseValue)
|
||||
{
|
||||
v[j].decreaseValue = v[i].decreaseValue + 1;
|
||||
v[j].prevDecreasePos = i;
|
||||
}
|
||||
|
||||
size_t const m = std::max(v[j].increaseValue, v[j].decreaseValue);
|
||||
if (m > res)
|
||||
{
|
||||
res = m;
|
||||
pos = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool increasing = true;
|
||||
if (v[pos].increaseValue < v[pos].decreaseValue)
|
||||
increasing = false;
|
||||
|
||||
while (res-- > 0)
|
||||
{
|
||||
*out++ = in[pos];
|
||||
|
||||
if (increasing)
|
||||
pos = v[pos].prevIncreasePos;
|
||||
else
|
||||
pos = v[pos].prevDecreasePos;
|
||||
}
|
||||
}
|
||||
} // namespace search
|
||||
46
libs/search/approximate_string_match.cpp
Normal file
46
libs/search/approximate_string_match.cpp
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
#include "search/approximate_string_match.hpp"
|
||||
|
||||
// TODO: Сделать модель ошибок.
|
||||
// Учитывать соседние кнопки на клавиатуре.
|
||||
// 1. Сосед вместо нужной
|
||||
// 2. Сосед до или после нужной.
|
||||
|
||||
namespace search
|
||||
{
|
||||
using strings::UniChar;
|
||||
|
||||
uint32_t DefaultMatchCost::Cost10(UniChar) const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost01(UniChar) const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost11(UniChar, UniChar) const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost12(UniChar, UniChar const *) const
|
||||
{
|
||||
return 512;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost21(UniChar const *, UniChar) const
|
||||
{
|
||||
return 512;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost22(UniChar const *, UniChar const *) const
|
||||
{
|
||||
return 512;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::SwapCost(UniChar, UniChar) const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
} // namespace search
|
||||
86
libs/search/approximate_string_match.hpp
Normal file
86
libs/search/approximate_string_match.hpp
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/base.hpp"
|
||||
#include "base/buffer_vector.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <queue>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
struct MatchCostData
|
||||
{
|
||||
uint32_t m_A, m_B;
|
||||
uint32_t m_Cost;
|
||||
|
||||
MatchCostData() : m_A(0), m_B(0), m_Cost(0) {}
|
||||
MatchCostData(uint32_t a, uint32_t b, uint32_t cost) : m_A(a), m_B(b), m_Cost(cost) {}
|
||||
|
||||
bool operator<(MatchCostData const & o) const { return m_Cost > o.m_Cost; }
|
||||
};
|
||||
|
||||
template <typename PriorityQueue>
|
||||
void PushMatchCost(PriorityQueue & q, uint32_t maxCost, uint32_t a, uint32_t b, uint32_t cost)
|
||||
{
|
||||
if (cost <= maxCost)
|
||||
q.push(MatchCostData(a, b, cost));
|
||||
}
|
||||
} // namespace impl
|
||||
|
||||
class DefaultMatchCost
|
||||
{
|
||||
public:
|
||||
uint32_t Cost10(strings::UniChar a) const;
|
||||
uint32_t Cost01(strings::UniChar b) const;
|
||||
uint32_t Cost11(strings::UniChar a, strings::UniChar b) const;
|
||||
uint32_t Cost12(strings::UniChar a, strings::UniChar const * pB) const;
|
||||
uint32_t Cost21(strings::UniChar const * pA, strings::UniChar b) const;
|
||||
uint32_t Cost22(strings::UniChar const * pA, strings::UniChar const * pB) const;
|
||||
uint32_t SwapCost(strings::UniChar a1, strings::UniChar a2) const;
|
||||
};
|
||||
|
||||
template <typename Char, typename CostFn>
|
||||
uint32_t StringMatchCost(Char const * sA, size_t sizeA, Char const * sB, size_t sizeB, CostFn const & costF,
|
||||
uint32_t maxCost, bool bPrefixMatch = false)
|
||||
{
|
||||
std::priority_queue<impl::MatchCostData, buffer_vector<impl::MatchCostData, 256>> q;
|
||||
q.push(impl::MatchCostData(0, 0, 0));
|
||||
while (!q.empty())
|
||||
{
|
||||
uint32_t a = q.top().m_A;
|
||||
uint32_t b = q.top().m_B;
|
||||
uint32_t const c = q.top().m_Cost;
|
||||
q.pop();
|
||||
while (a < sizeA && b < sizeB && sA[a] == sB[b])
|
||||
{
|
||||
++a;
|
||||
++b;
|
||||
}
|
||||
|
||||
if (a == sizeA && (bPrefixMatch || b == sizeB))
|
||||
return c;
|
||||
|
||||
if (a < sizeA)
|
||||
impl::PushMatchCost(q, maxCost, a + 1, b, c + costF.Cost10(sA[a]));
|
||||
if (b < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a, b + 1, c + costF.Cost01(sB[b]));
|
||||
if (a < sizeA && b < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a + 1, b + 1, c + costF.Cost11(sA[a], sB[b]));
|
||||
if (a + 1 < sizeA && b < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a + 2, b + 1, c + costF.Cost21(&sA[a], sB[b]));
|
||||
if (a < sizeA && b + 1 < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a + 1, b + 2, c + costF.Cost12(sA[a], &sB[b]));
|
||||
if (a + 1 < sizeA && b + 1 < sizeB)
|
||||
{
|
||||
impl::PushMatchCost(q, maxCost, a + 2, b + 2, c + costF.Cost22(&sA[a], &sB[b]));
|
||||
if (sA[a] == sB[b + 1] && sA[a + 1] == sB[b])
|
||||
impl::PushMatchCost(q, maxCost, a + 2, b + 2, c + costF.SwapCost(sA[a], sA[a + 1]));
|
||||
}
|
||||
}
|
||||
return maxCost + 1;
|
||||
}
|
||||
} // namespace search
|
||||
56
libs/search/base/inverted_list.hpp
Normal file
56
libs/search/base/inverted_list.hpp
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
// This class is supposed to be used in inverted index to store list
|
||||
// of document ids.
|
||||
template <typename Id>
|
||||
class InvertedList
|
||||
{
|
||||
public:
|
||||
using value_type = Id;
|
||||
using Value = Id;
|
||||
|
||||
bool Add(Id const & id)
|
||||
{
|
||||
auto it = std::lower_bound(m_ids.begin(), m_ids.end(), id);
|
||||
if (it != m_ids.end() && *it == id)
|
||||
return false;
|
||||
m_ids.insert(it, id);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Erase(Id const & id)
|
||||
{
|
||||
auto it = std::lower_bound(m_ids.begin(), m_ids.end(), id);
|
||||
if (it == m_ids.end() || *it != id)
|
||||
return false;
|
||||
m_ids.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename ToDo>
|
||||
void ForEach(ToDo && toDo) const
|
||||
{
|
||||
for (auto const & id : m_ids)
|
||||
toDo(id);
|
||||
}
|
||||
|
||||
size_t Size() const { return m_ids.size(); }
|
||||
|
||||
bool Empty() const { return Size() == 0; }
|
||||
|
||||
void Clear() { m_ids.clear(); }
|
||||
|
||||
void Swap(InvertedList & rhs) { m_ids.swap(rhs.m_ids); }
|
||||
|
||||
private:
|
||||
std::vector<Id> m_ids;
|
||||
};
|
||||
} // namespace search_base
|
||||
97
libs/search/base/mem_search_index.hpp
Normal file
97
libs/search/base/mem_search_index.hpp
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/base/inverted_list.hpp"
|
||||
|
||||
#include "indexer/trie.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/mem_trie.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
template <typename Id>
|
||||
class MemSearchIndex
|
||||
{
|
||||
public:
|
||||
using Token = strings::UniString;
|
||||
using Char = Token::value_type;
|
||||
using List = InvertedList<Id>;
|
||||
using Trie = base::MemTrie<Token, List>;
|
||||
using Iterator = trie::MemTrieIterator<Token, List>;
|
||||
|
||||
template <typename Doc>
|
||||
void Add(Id const & id, Doc const & doc)
|
||||
{
|
||||
ForEachToken(id, doc, [&](Token const & token) { m_trie.Add(token, id); });
|
||||
}
|
||||
|
||||
template <typename Doc>
|
||||
void Erase(Id const & id, Doc const & doc)
|
||||
{
|
||||
ForEachToken(id, doc, [&](Token const & token) { m_trie.Erase(token, id); });
|
||||
}
|
||||
|
||||
Iterator GetRootIterator() const { return Iterator(m_trie.GetRootIterator()); }
|
||||
|
||||
std::vector<Id> GetAllIds() const
|
||||
{
|
||||
return WithIds([&](std::vector<Id> & ids)
|
||||
{ m_trie.ForEachInTrie([&](Token const & /* token */, Id const & id) { ids.push_back(id); }); });
|
||||
}
|
||||
|
||||
size_t GetNumDocs(int8_t lang, strings::UniString const & token, bool prefix) const
|
||||
{
|
||||
auto const key = AddLang(lang, token);
|
||||
|
||||
if (!prefix)
|
||||
{
|
||||
size_t numDocs = 0;
|
||||
m_trie.WithValuesHolder(key, [&](List const & list) { numDocs = list.Size(); });
|
||||
return numDocs;
|
||||
}
|
||||
|
||||
return WithIds([&](std::vector<Id> & ids)
|
||||
{
|
||||
m_trie.ForEachInSubtree(key, [&](Token const & /* token */, Id const & id) { ids.push_back(id); });
|
||||
}).size();
|
||||
}
|
||||
|
||||
private:
|
||||
static Token AddLang(int8_t lang, Token const & token)
|
||||
{
|
||||
Token r(1 + token.size());
|
||||
r[0] = static_cast<Char>(lang);
|
||||
std::copy(token.begin(), token.end(), r.begin() + 1);
|
||||
return r;
|
||||
}
|
||||
|
||||
template <typename Doc, typename Fn>
|
||||
void ForEachToken(Id const & /*id*/, Doc const & doc, Fn && fn)
|
||||
{
|
||||
doc.ForEachToken([&](int8_t lang, Token const & token)
|
||||
{
|
||||
if (lang >= 0)
|
||||
fn(AddLang(lang, token));
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
static std::vector<Id> WithIds(Fn && fn)
|
||||
{
|
||||
std::vector<Id> ids;
|
||||
fn(ids);
|
||||
base::SortUnique(ids);
|
||||
return ids;
|
||||
}
|
||||
|
||||
Trie m_trie;
|
||||
};
|
||||
} // namespace search_base
|
||||
116
libs/search/base/text_index/dictionary.hpp
Normal file
116
libs/search/base/text_index/dictionary.hpp
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/base/text_index/header.hpp"
|
||||
#include "search/base/text_index/text_index.hpp"
|
||||
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
// The dictionary contains all tokens that are present
|
||||
// in the text index.
|
||||
class TextIndexDictionary
|
||||
{
|
||||
public:
|
||||
bool GetTokenId(Token const & token, size_t & id) const
|
||||
{
|
||||
auto const it = std::lower_bound(m_tokens.cbegin(), m_tokens.cend(), token);
|
||||
if (it == m_tokens.cend() || *it != token)
|
||||
return false;
|
||||
id = base::checked_cast<uint32_t>(std::distance(m_tokens.cbegin(), it));
|
||||
return true;
|
||||
}
|
||||
|
||||
void SetTokens(std::vector<Token> && tokens)
|
||||
{
|
||||
ASSERT(std::is_sorted(tokens.begin(), tokens.end()), ());
|
||||
m_tokens = std::move(tokens);
|
||||
}
|
||||
|
||||
std::vector<Token> const & GetTokens() const { return m_tokens; }
|
||||
|
||||
template <typename Sink>
|
||||
void Serialize(Sink & sink, TextIndexHeader & header, uint64_t startPos) const
|
||||
{
|
||||
header.m_numTokens = base::checked_cast<uint32_t>(m_tokens.size());
|
||||
|
||||
header.m_dictPositionsOffset = RelativePos(sink, startPos);
|
||||
// An uint32_t for each 32-bit offset and an uint32_t for the dummy entry at the end.
|
||||
WriteZeroesToSink(sink, sizeof(uint32_t) * (header.m_numTokens + 1));
|
||||
header.m_dictWordsOffset = RelativePos(sink, startPos);
|
||||
|
||||
std::vector<uint32_t> offsets;
|
||||
offsets.reserve(header.m_numTokens + 1);
|
||||
for (auto const & token : m_tokens)
|
||||
{
|
||||
offsets.emplace_back(RelativePos(sink, startPos));
|
||||
SerializeToken(sink, token);
|
||||
}
|
||||
offsets.emplace_back(RelativePos(sink, startPos));
|
||||
|
||||
{
|
||||
uint64_t const savedPos = sink.Pos();
|
||||
sink.Seek(startPos + header.m_dictPositionsOffset);
|
||||
|
||||
for (uint32_t const o : offsets)
|
||||
WriteToSink(sink, o);
|
||||
|
||||
CHECK_EQUAL(sink.Pos(), startPos + header.m_dictWordsOffset, ());
|
||||
sink.Seek(savedPos);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
void Deserialize(Source & source, TextIndexHeader const & header)
|
||||
{
|
||||
auto const startPos = source.Pos();
|
||||
|
||||
std::vector<uint32_t> tokenOffsets(header.m_numTokens + 1);
|
||||
for (uint32_t & offset : tokenOffsets)
|
||||
offset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
|
||||
uint64_t const expectedSize = header.m_dictWordsOffset - header.m_dictPositionsOffset;
|
||||
CHECK_EQUAL(source.Pos(), startPos + expectedSize, ());
|
||||
m_tokens.resize(header.m_numTokens);
|
||||
for (size_t i = 0; i < m_tokens.size(); ++i)
|
||||
{
|
||||
size_t const size = base::checked_cast<size_t>(tokenOffsets[i + 1] - tokenOffsets[i]);
|
||||
DeserializeToken(source, m_tokens[i], size);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Sink>
|
||||
static void SerializeToken(Sink & sink, Token const & token)
|
||||
{
|
||||
CHECK(!token.empty(), ());
|
||||
// todo(@m) Endianness.
|
||||
sink.Write(token.data(), token.size() * sizeof(typename Token::value_type));
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
static void DeserializeToken(Source & source, Token & token, size_t size)
|
||||
{
|
||||
CHECK_GREATER(size, 0, ());
|
||||
ASSERT_EQUAL(size % sizeof(typename Token::value_type), 0, ());
|
||||
token.resize(size / sizeof(typename Token::value_type));
|
||||
source.Read(&token[0], size);
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
static uint32_t RelativePos(Sink & sink, uint64_t startPos)
|
||||
{
|
||||
return base::checked_cast<uint32_t>(sink.Pos() - startPos);
|
||||
}
|
||||
|
||||
std::vector<Token> m_tokens;
|
||||
};
|
||||
} // namespace search_base
|
||||
9
libs/search/base/text_index/header.cpp
Normal file
9
libs/search/base/text_index/header.cpp
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#include "search/base/text_index/header.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
// static
|
||||
string const TextIndexHeader::kHeaderMagic = "mapsmetextidx";
|
||||
} // namespace search_base
|
||||
57
libs/search/base/text_index/header.hpp
Normal file
57
libs/search/base/text_index/header.hpp
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "search/base/text_index/text_index.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
struct TextIndexHeader
|
||||
{
|
||||
template <typename Sink>
|
||||
void Serialize(Sink & sink) const
|
||||
{
|
||||
CHECK_EQUAL(m_version, TextIndexVersion::V0, ());
|
||||
|
||||
sink.Write(kHeaderMagic.data(), kHeaderMagic.size());
|
||||
WriteToSink(sink, static_cast<uint8_t>(m_version));
|
||||
WriteToSink(sink, m_numTokens);
|
||||
WriteToSink(sink, m_dictPositionsOffset);
|
||||
WriteToSink(sink, m_dictWordsOffset);
|
||||
WriteToSink(sink, m_postingsStartsOffset);
|
||||
WriteToSink(sink, m_postingsListsOffset);
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
void Deserialize(Source & source)
|
||||
{
|
||||
CHECK_EQUAL(m_version, TextIndexVersion::V0, ());
|
||||
|
||||
std::string headerMagic(kHeaderMagic.size(), ' ');
|
||||
source.Read(&headerMagic[0], headerMagic.size());
|
||||
CHECK_EQUAL(headerMagic, kHeaderMagic, ());
|
||||
m_version = static_cast<TextIndexVersion>(ReadPrimitiveFromSource<uint8_t>(source));
|
||||
CHECK_EQUAL(m_version, TextIndexVersion::V0, ());
|
||||
m_numTokens = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
m_dictPositionsOffset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
m_dictWordsOffset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
m_postingsStartsOffset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
m_postingsListsOffset = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
}
|
||||
|
||||
static std::string const kHeaderMagic;
|
||||
TextIndexVersion m_version = TextIndexVersion::Latest;
|
||||
uint32_t m_numTokens = 0;
|
||||
uint32_t m_dictPositionsOffset = 0;
|
||||
uint32_t m_dictWordsOffset = 0;
|
||||
uint32_t m_postingsStartsOffset = 0;
|
||||
uint32_t m_postingsListsOffset = 0;
|
||||
};
|
||||
} // namespace search_base
|
||||
34
libs/search/base/text_index/mem.cpp
Normal file
34
libs/search/base/text_index/mem.cpp
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
#include "search/base/text_index/mem.hpp"
|
||||
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
void MemTextIndex::AddPosting(Token const & token, Posting const & posting)
|
||||
{
|
||||
m_postingsByToken[token].emplace_back(posting);
|
||||
}
|
||||
|
||||
void MemTextIndex::SortPostings()
|
||||
{
|
||||
for (auto & entry : m_postingsByToken)
|
||||
{
|
||||
// A posting may occur several times in a document,
|
||||
// so we remove duplicates for the docid index.
|
||||
// If the count is needed for ranking it may be stored
|
||||
// separately.
|
||||
base::SortUnique(entry.second);
|
||||
}
|
||||
}
|
||||
|
||||
void MemTextIndex::BuildDictionary()
|
||||
{
|
||||
vector<Token> tokens;
|
||||
tokens.reserve(m_postingsByToken.size());
|
||||
for (auto const & entry : m_postingsByToken)
|
||||
tokens.emplace_back(entry.first);
|
||||
m_dictionary.SetTokens(std::move(tokens));
|
||||
}
|
||||
} // namespace search_base
|
||||
167
libs/search/base/text_index/mem.hpp
Normal file
167
libs/search/base/text_index/mem.hpp
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/base/text_index/dictionary.hpp"
|
||||
#include "search/base/text_index/header.hpp"
|
||||
#include "search/base/text_index/postings.hpp"
|
||||
#include "search/base/text_index/text_index.hpp"
|
||||
#include "search/base/text_index/utils.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
class MemTextIndex
|
||||
{
|
||||
public:
|
||||
MemTextIndex() = default;
|
||||
|
||||
void AddPosting(Token const & token, Posting const & posting);
|
||||
|
||||
// Executes |fn| on every posting associated with |token|.
|
||||
// The order of postings is not specified.
|
||||
template <typename Fn>
|
||||
void ForEachPosting(Token const & token, Fn && fn) const
|
||||
{
|
||||
auto const it = m_postingsByToken.find(token);
|
||||
if (it == m_postingsByToken.end())
|
||||
return;
|
||||
for (auto const p : it->second)
|
||||
fn(p);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachPosting(strings::UniString const & token, Fn && fn) const
|
||||
{
|
||||
ForEachPosting(strings::ToUtf8(token), std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
void Serialize(Sink & sink)
|
||||
{
|
||||
SortPostings();
|
||||
BuildDictionary();
|
||||
|
||||
TextIndexHeader header;
|
||||
|
||||
uint64_t const startPos = sink.Pos();
|
||||
// Will be filled in later.
|
||||
header.Serialize(sink);
|
||||
|
||||
SerializeDictionary(sink, header, startPos);
|
||||
SerializePostingsLists(sink, header, startPos);
|
||||
|
||||
uint64_t const finishPos = sink.Pos();
|
||||
sink.Seek(startPos);
|
||||
header.Serialize(sink);
|
||||
sink.Seek(finishPos);
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
void Deserialize(Source & source)
|
||||
{
|
||||
uint64_t startPos = source.Pos();
|
||||
|
||||
TextIndexHeader header;
|
||||
header.Deserialize(source);
|
||||
|
||||
DeserializeDictionary(source, header, startPos);
|
||||
DeserializePostingsLists(source, header, startPos);
|
||||
}
|
||||
|
||||
private:
|
||||
class MemPostingsFetcher : public PostingsFetcher
|
||||
{
|
||||
public:
|
||||
explicit MemPostingsFetcher(std::map<Token, std::vector<Posting>> const & postingsByToken)
|
||||
: m_postingsByToken(postingsByToken)
|
||||
, m_it(m_postingsByToken.begin())
|
||||
{}
|
||||
|
||||
// PostingsFetcher overrides:
|
||||
bool IsValid() const override { return m_it != m_postingsByToken.end(); }
|
||||
|
||||
void Advance() override
|
||||
{
|
||||
if (m_it != m_postingsByToken.end())
|
||||
++m_it;
|
||||
}
|
||||
|
||||
void ForEachPosting(Fn const & fn) const override
|
||||
{
|
||||
CHECK(IsValid(), ());
|
||||
for (uint32_t p : m_it->second)
|
||||
fn(p);
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<Token, std::vector<Posting>> const & m_postingsByToken;
|
||||
// Iterator to the current token that will be processed when ForEachPosting is called.
|
||||
std::map<Token, std::vector<Posting>>::const_iterator m_it;
|
||||
};
|
||||
|
||||
void SortPostings();
|
||||
|
||||
void BuildDictionary();
|
||||
|
||||
template <typename Sink>
|
||||
void SerializeDictionary(Sink & sink, TextIndexHeader & header, uint64_t startPos) const
|
||||
{
|
||||
m_dictionary.Serialize(sink, header, startPos);
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
void DeserializeDictionary(Source & source, TextIndexHeader const & header, uint64_t startPos)
|
||||
{
|
||||
CHECK_EQUAL(source.Pos(), startPos + header.m_dictPositionsOffset, ());
|
||||
m_dictionary.Deserialize(source, header);
|
||||
}
|
||||
|
||||
template <typename Sink>
|
||||
void SerializePostingsLists(Sink & sink, TextIndexHeader & header, uint64_t startPos) const
|
||||
{
|
||||
MemPostingsFetcher fetcher(m_postingsByToken);
|
||||
WritePostings(sink, startPos, header, fetcher);
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
void DeserializePostingsLists(Source & source, TextIndexHeader const & header, uint64_t startPos)
|
||||
{
|
||||
CHECK_EQUAL(source.Pos(), startPos + header.m_postingsStartsOffset, ());
|
||||
std::vector<uint32_t> postingsStarts(header.m_numTokens + 1);
|
||||
for (uint32_t & start : postingsStarts)
|
||||
start = ReadPrimitiveFromSource<uint32_t>(source);
|
||||
|
||||
auto const & tokens = m_dictionary.GetTokens();
|
||||
CHECK_EQUAL(source.Pos(), startPos + header.m_postingsListsOffset, ());
|
||||
m_postingsByToken.clear();
|
||||
for (size_t i = 0; i < header.m_numTokens; ++i)
|
||||
{
|
||||
std::vector<uint32_t> postings;
|
||||
uint32_t last = 0;
|
||||
while (source.Pos() < startPos + postingsStarts[i + 1])
|
||||
{
|
||||
last += ReadVarUint<uint32_t>(source);
|
||||
postings.emplace_back(last);
|
||||
}
|
||||
CHECK_EQUAL(source.Pos(), postingsStarts[i + 1], ());
|
||||
|
||||
m_postingsByToken.emplace(tokens[i], postings);
|
||||
}
|
||||
}
|
||||
|
||||
std::map<Token, std::vector<Posting>> m_postingsByToken;
|
||||
TextIndexDictionary m_dictionary;
|
||||
};
|
||||
} // namespace search_base
|
||||
126
libs/search/base/text_index/merger.cpp
Normal file
126
libs/search/base/text_index/merger.cpp
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
#include "search/base/text_index/merger.hpp"
|
||||
|
||||
#include "search/base/text_index/dictionary.hpp"
|
||||
#include "search/base/text_index/header.hpp"
|
||||
#include "search/base/text_index/postings.hpp"
|
||||
|
||||
#include "coding/file_writer.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/logging.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace search_base;
|
||||
|
||||
class MergedPostingsListFetcher : public PostingsFetcher
|
||||
{
|
||||
public:
|
||||
MergedPostingsListFetcher(TextIndexDictionary const & dict, TextIndexReader const & index1,
|
||||
TextIndexReader const & index2)
|
||||
: m_dict(dict)
|
||||
, m_index1(index1)
|
||||
, m_index2(index2)
|
||||
{
|
||||
ReadPostings();
|
||||
}
|
||||
|
||||
// PostingsFetcher overrides:
|
||||
bool IsValid() const override
|
||||
{
|
||||
auto const & tokens = m_dict.GetTokens();
|
||||
CHECK_LESS_OR_EQUAL(m_tokenId, tokens.size(), ());
|
||||
return m_tokenId < tokens.size();
|
||||
}
|
||||
|
||||
void Advance() override
|
||||
{
|
||||
auto const & tokens = m_dict.GetTokens();
|
||||
CHECK_LESS_OR_EQUAL(m_tokenId, tokens.size(), ());
|
||||
if (m_tokenId == tokens.size())
|
||||
return;
|
||||
|
||||
++m_tokenId;
|
||||
ReadPostings();
|
||||
}
|
||||
|
||||
void ForEachPosting(Fn const & fn) const override
|
||||
{
|
||||
CHECK(IsValid(), ());
|
||||
for (uint32_t p : m_postings)
|
||||
fn(p);
|
||||
}
|
||||
|
||||
private:
|
||||
// Reads postings for the current token.
|
||||
void ReadPostings()
|
||||
{
|
||||
m_postings.clear();
|
||||
if (!IsValid())
|
||||
return;
|
||||
|
||||
auto const & tokens = m_dict.GetTokens();
|
||||
m_index1.ForEachPosting(tokens[m_tokenId], base::MakeBackInsertFunctor(m_postings));
|
||||
m_index2.ForEachPosting(tokens[m_tokenId], base::MakeBackInsertFunctor(m_postings));
|
||||
base::SortUnique(m_postings);
|
||||
}
|
||||
|
||||
TextIndexDictionary const & m_dict;
|
||||
TextIndexReader const & m_index1;
|
||||
TextIndexReader const & m_index2;
|
||||
// Index of the next token from |m_dict| to be processed.
|
||||
size_t m_tokenId = 0;
|
||||
vector<uint32_t> m_postings;
|
||||
};
|
||||
|
||||
TextIndexDictionary MergeDictionaries(TextIndexDictionary const & dict1, TextIndexDictionary const & dict2)
|
||||
{
|
||||
vector<Token> commonTokens;
|
||||
auto const & ts1 = dict1.GetTokens();
|
||||
auto const & ts2 = dict2.GetTokens();
|
||||
merge(ts1.begin(), ts1.end(), ts2.begin(), ts2.end(), back_inserter(commonTokens));
|
||||
ASSERT(is_sorted(commonTokens.begin(), commonTokens.end()), ());
|
||||
commonTokens.erase(unique(commonTokens.begin(), commonTokens.end()), commonTokens.end());
|
||||
|
||||
TextIndexDictionary dict;
|
||||
dict.SetTokens(std::move(commonTokens));
|
||||
return dict;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
// static
|
||||
void TextIndexMerger::Merge(TextIndexReader const & index1, TextIndexReader const & index2, FileWriter & sink)
|
||||
{
|
||||
TextIndexDictionary const dict = MergeDictionaries(index1.GetDictionary(), index2.GetDictionary());
|
||||
|
||||
TextIndexHeader header;
|
||||
|
||||
uint64_t const startPos = sink.Pos();
|
||||
// Will be filled in later.
|
||||
header.Serialize(sink);
|
||||
|
||||
dict.Serialize(sink, header, startPos);
|
||||
|
||||
MergedPostingsListFetcher fetcher(dict, index1, index2);
|
||||
WritePostings(sink, startPos, header, fetcher);
|
||||
|
||||
// Fill in the header.
|
||||
uint64_t const finishPos = sink.Pos();
|
||||
sink.Seek(startPos);
|
||||
header.Serialize(sink);
|
||||
sink.Seek(finishPos);
|
||||
}
|
||||
} // namespace search_base
|
||||
26
libs/search/base/text_index/merger.hpp
Normal file
26
libs/search/base/text_index/merger.hpp
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/base/text_index/reader.hpp"
|
||||
|
||||
class FileWriter;
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
// Merges two on-disk text indexes and writes them to a new one.
|
||||
class TextIndexMerger
|
||||
{
|
||||
public:
|
||||
// The merging process is as follows.
|
||||
// 1. Dictionaries from both indexes are read into memory, merged
|
||||
// and written to disk.
|
||||
// 2. One uint32_t per entry is reserved in memory to calculate the
|
||||
// offsets of the postings lists.
|
||||
// 3. One token at a time, all postings for the token are read from
|
||||
// both indexes into memory, unified and written to disk.
|
||||
// 4. The offsets are written to disk.
|
||||
//
|
||||
// Note that the dictionary and offsets are kept in memory during the whole
|
||||
// merging process.
|
||||
static void Merge(TextIndexReader const & index1, TextIndexReader const & index2, FileWriter & sink);
|
||||
};
|
||||
} // namespace search_base
|
||||
88
libs/search/base/text_index/postings.hpp
Normal file
88
libs/search/base/text_index/postings.hpp
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/base/text_index/header.hpp"
|
||||
#include "search/base/text_index/text_index.hpp"
|
||||
#include "search/base/text_index/utils.hpp"
|
||||
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
struct TextIndexHeader;
|
||||
|
||||
// A helper class that fetches the postings lists for
|
||||
// one token at a time. It is assumed that the tokens
|
||||
// are enumerated in the lexicographic order.
|
||||
class PostingsFetcher
|
||||
{
|
||||
public:
|
||||
using Fn = std::function<void(uint32_t)>;
|
||||
|
||||
virtual ~PostingsFetcher() = default;
|
||||
|
||||
// Returns true when there are tokens left in the fetcher and false otherwise.
|
||||
virtual bool IsValid() const = 0;
|
||||
|
||||
// Advances fetcher to the next token.
|
||||
virtual void Advance() = 0;
|
||||
|
||||
// Calls |fn| for every posting for the current token. Initially,
|
||||
// current token is the first token and then calls to Advance
|
||||
// may be used to process the next token until the underlying
|
||||
// source of the tokens is exhausted and the fetcher is no longer valid.
|
||||
virtual void ForEachPosting(Fn const & fn) const = 0;
|
||||
};
|
||||
|
||||
// Fetches the postings list one by one from |fetcher| and writes them
|
||||
// to |sink|, updating the fields in |header| that correspond to the
|
||||
// postings list.
|
||||
// |startPos| marks the start of the entire text index and is needed to compute
|
||||
// the offsets that are stored in |header|.
|
||||
template <typename Sink>
|
||||
void WritePostings(Sink & sink, uint64_t startPos, TextIndexHeader & header, PostingsFetcher & fetcher)
|
||||
{
|
||||
header.m_postingsStartsOffset = RelativePos(sink, startPos);
|
||||
// An uint32_t for each 32-bit offset and an uint32_t for the dummy entry at the end.
|
||||
WriteZeroesToSink(sink, sizeof(uint32_t) * (header.m_numTokens + 1));
|
||||
|
||||
header.m_postingsListsOffset = RelativePos(sink, startPos);
|
||||
|
||||
std::vector<uint32_t> postingsStarts;
|
||||
postingsStarts.reserve(header.m_numTokens);
|
||||
{
|
||||
uint32_t last;
|
||||
// todo(@m) s/uint32_t/Posting/ ?
|
||||
auto writePostings = [&](uint32_t p)
|
||||
{
|
||||
CHECK(last == 0 || last < p, (last, p));
|
||||
uint32_t const delta = p - last;
|
||||
WriteVarUint(sink, delta);
|
||||
last = p;
|
||||
};
|
||||
while (fetcher.IsValid())
|
||||
{
|
||||
postingsStarts.emplace_back(RelativePos(sink, startPos));
|
||||
last = 0;
|
||||
fetcher.ForEachPosting(writePostings);
|
||||
fetcher.Advance();
|
||||
}
|
||||
}
|
||||
// One more for convenience.
|
||||
postingsStarts.emplace_back(RelativePos(sink, startPos));
|
||||
|
||||
{
|
||||
uint64_t const savedPos = sink.Pos();
|
||||
sink.Seek(startPos + header.m_postingsStartsOffset);
|
||||
for (uint32_t const s : postingsStarts)
|
||||
WriteToSink(sink, s);
|
||||
|
||||
CHECK_EQUAL(sink.Pos(), startPos + header.m_postingsListsOffset, ());
|
||||
sink.Seek(savedPos);
|
||||
}
|
||||
}
|
||||
} // namespace search_base
|
||||
78
libs/search/base/text_index/reader.hpp
Normal file
78
libs/search/base/text_index/reader.hpp
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/base/text_index/dictionary.hpp"
|
||||
#include "search/base/text_index/text_index.hpp"
|
||||
|
||||
#include "coding/file_reader.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
// A reader class for on-demand reading of postings lists from disk.
|
||||
class TextIndexReader
|
||||
{
|
||||
public:
|
||||
explicit TextIndexReader(FileReader const & fileReader) : m_fileReader(fileReader)
|
||||
{
|
||||
ReaderSource<FileReader> headerSource(m_fileReader);
|
||||
TextIndexHeader header;
|
||||
header.Deserialize(headerSource);
|
||||
|
||||
uint64_t const dictStart = header.m_dictPositionsOffset;
|
||||
uint64_t const dictEnd = header.m_postingsStartsOffset;
|
||||
ReaderSource<FileReader> dictSource(m_fileReader.SubReader(dictStart, dictEnd - dictStart));
|
||||
m_dictionary.Deserialize(dictSource, header);
|
||||
|
||||
uint64_t const postStart = header.m_postingsStartsOffset;
|
||||
uint64_t const postEnd = header.m_postingsListsOffset;
|
||||
ReaderSource<FileReader> postingsSource(m_fileReader.SubReader(postStart, postEnd - postStart));
|
||||
m_postingsStarts.resize(header.m_numTokens + 1);
|
||||
for (uint32_t & start : m_postingsStarts)
|
||||
start = ReadPrimitiveFromSource<uint32_t>(postingsSource);
|
||||
}
|
||||
|
||||
// Executes |fn| on every posting associated with |token|.
|
||||
// The order of postings is not specified.
|
||||
template <typename Fn>
|
||||
void ForEachPosting(Token const & token, Fn && fn) const
|
||||
{
|
||||
size_t tokenId = 0;
|
||||
if (!m_dictionary.GetTokenId(token, tokenId))
|
||||
return;
|
||||
CHECK_LESS(tokenId + 1, m_postingsStarts.size(), ());
|
||||
|
||||
ReaderSource<FileReader> source(
|
||||
m_fileReader.SubReader(m_postingsStarts[tokenId], m_postingsStarts[tokenId + 1] - m_postingsStarts[tokenId]));
|
||||
|
||||
uint32_t last = 0;
|
||||
while (source.Size() > 0)
|
||||
{
|
||||
last += ReadVarUint<uint32_t>(source);
|
||||
fn(last);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachPosting(strings::UniString const & token, Fn && fn) const
|
||||
{
|
||||
auto const utf8s = strings::ToUtf8(token);
|
||||
ForEachPosting(std::move(utf8s), std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
TextIndexDictionary const & GetDictionary() const { return m_dictionary; }
|
||||
|
||||
private:
|
||||
FileReader m_fileReader;
|
||||
TextIndexDictionary m_dictionary;
|
||||
std::vector<uint32_t> m_postingsStarts;
|
||||
};
|
||||
} // namespace search_base
|
||||
20
libs/search/base/text_index/text_index.cpp
Normal file
20
libs/search/base/text_index/text_index.cpp
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#include "search/base/text_index/text_index.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
string DebugPrint(TextIndexVersion const & version)
|
||||
{
|
||||
switch (version)
|
||||
{
|
||||
case TextIndexVersion::V0: return "V0";
|
||||
}
|
||||
string ret = "Unknown TextIndexHeader version: " + strings::to_string(static_cast<uint8_t>(version));
|
||||
ASSERT(false, (ret));
|
||||
return ret;
|
||||
}
|
||||
} // namespace search_base
|
||||
42
libs/search/base/text_index/text_index.hpp
Normal file
42
libs/search/base/text_index/text_index.hpp
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
// This file contains the structures needed to store an
|
||||
// updatable text index on disk.
|
||||
//
|
||||
// The index maps tokens of string type (typically std::string or
|
||||
// strings::UniString) to postings lists, i.e. to lists of entities
|
||||
// called postings that encode the locations of the strings in the collection
|
||||
// of the text documents that is being indexed. An example of a posting
|
||||
// is a document id (docid). Another example is a pair of a document id and
|
||||
// a position within the corresponding document.
|
||||
//
|
||||
// The updates are performed by rebuilding the index, either as a result
|
||||
// of merging several indexes together, or as a result of clearing outdated
|
||||
// entries from an old index.
|
||||
//
|
||||
// For version 0, the postings lists are docid arrays, i.e. arrays of unsigned
|
||||
// 32-bit integers stored in increasing order.
|
||||
// The structure of the index is:
|
||||
// [header: version and offsets]
|
||||
// [array containing the starting positions of tokens]
|
||||
// [tokens, written without separators in the lexicographical order]
|
||||
// [array containing the offsets for the postings lists]
|
||||
// [postings lists, stored as delta-encoded varints]
|
||||
//
|
||||
// All offsets are measured relative to the start of the index.
|
||||
namespace search_base
|
||||
{
|
||||
using Token = std::string;
|
||||
using Posting = uint32_t;
|
||||
|
||||
enum class TextIndexVersion : uint8_t
|
||||
{
|
||||
V0 = 0,
|
||||
Latest = V0
|
||||
};
|
||||
|
||||
std::string DebugPrint(TextIndexVersion const & version);
|
||||
} // namespace search_base
|
||||
14
libs/search/base/text_index/utils.hpp
Normal file
14
libs/search/base/text_index/utils.hpp
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/checked_cast.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace search_base
|
||||
{
|
||||
template <typename Sink>
|
||||
uint32_t RelativePos(Sink & sink, uint64_t startPos)
|
||||
{
|
||||
return base::checked_cast<uint32_t>(sink.Pos() - startPos);
|
||||
}
|
||||
} // namespace search_base
|
||||
20
libs/search/bookmarks/data.cpp
Normal file
20
libs/search/bookmarks/data.cpp
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#include "search/bookmarks/data.hpp"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace bookmarks
|
||||
{
|
||||
string DebugPrint(Data const & data)
|
||||
{
|
||||
ostringstream os;
|
||||
os << "Data [";
|
||||
os << "names: " << ::DebugPrint(data.GetNames()) << ", ";
|
||||
os << "description: " << data.GetDescription() << "]";
|
||||
return os.str();
|
||||
}
|
||||
} // namespace bookmarks
|
||||
} // namespace search
|
||||
90
libs/search/bookmarks/data.hpp
Normal file
90
libs/search/bookmarks/data.hpp
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "kml/types.hpp"
|
||||
|
||||
#include "coding/string_utf8_multilang.hpp"
|
||||
|
||||
#include "base/stl_helpers.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace bookmarks
|
||||
{
|
||||
// TODO (@m, @y): add more features for a bookmark here, i.e. address, center.
|
||||
class Data
|
||||
{
|
||||
public:
|
||||
Data() = default;
|
||||
|
||||
Data(kml::BookmarkData const & bookmarkData, std::string const & locale)
|
||||
: m_names(ExtractIndexableNames(bookmarkData, locale))
|
||||
, m_description(kml::GetDefaultStr(bookmarkData.m_description))
|
||||
{}
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachNameToken(Fn && fn) const
|
||||
{
|
||||
auto withDefaultLang = [&](strings::UniString const & token)
|
||||
{
|
||||
// Note that the Default Language here is not the same as in the kml library.
|
||||
// Bookmark search by locale is not supported so every name is stored
|
||||
// in the default branch of the search trie.
|
||||
fn(StringUtf8Multilang::kDefaultCode, token);
|
||||
};
|
||||
|
||||
for (auto const & name : m_names)
|
||||
ForEachNormalizedToken(name, withDefaultLang);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachDescriptionToken(Fn && fn) const
|
||||
{
|
||||
auto withDefaultLang = [&](strings::UniString const & token) { fn(StringUtf8Multilang::kDefaultCode, token); };
|
||||
|
||||
ForEachNormalizedToken(m_description, withDefaultLang);
|
||||
}
|
||||
|
||||
std::vector<std::string> const & GetNames() const { return m_names; }
|
||||
std::string const & GetDescription() const { return m_description; }
|
||||
|
||||
private:
|
||||
std::vector<std::string> ExtractIndexableNames(kml::BookmarkData const & bookmarkData, std::string const & locale)
|
||||
{
|
||||
std::vector<std::string> names;
|
||||
|
||||
// Same as GetPreferredBookmarkName from the map library. Duplicated here to avoid dependency.
|
||||
names.emplace_back(kml::GetPreferredBookmarkName(bookmarkData, locale));
|
||||
names.emplace_back(kml::GetPreferredBookmarkStr(bookmarkData.m_name, locale));
|
||||
|
||||
// todo(@m) Platform's API does not allow to use |locale| here.
|
||||
names.emplace_back(kml::GetLocalizedFeatureType(bookmarkData.m_featureTypes));
|
||||
|
||||
// Normalization is postponed. It is unlikely but we may still need original strings later.
|
||||
// Trimming seems harmless, though.
|
||||
for (auto & s : names)
|
||||
strings::Trim(s);
|
||||
|
||||
base::SortUnique(names);
|
||||
base::EraseIf(names, [](std::string const & s) { return s.empty(); });
|
||||
return names;
|
||||
}
|
||||
|
||||
// Names and custom names in all the locales that we are interested in.
|
||||
// The locale set is fixed at startup and the relevant names are provided
|
||||
// by the kml library. In case the user switches the device locale while
|
||||
// running the app, the UI will adapt; however the search will not, and the
|
||||
// bookmarks will not be reindexed. We consider this situation to be improbable
|
||||
// enough to justify not storing redundant names here.
|
||||
std::vector<std::string> m_names;
|
||||
std::string m_description;
|
||||
};
|
||||
|
||||
std::string DebugPrint(Data const & data);
|
||||
} // namespace bookmarks
|
||||
} // namespace search
|
||||
281
libs/search/bookmarks/processor.cpp
Normal file
281
libs/search/bookmarks/processor.cpp
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
#include "search/bookmarks/processor.hpp"
|
||||
|
||||
#include "search/emitter.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
#include "base/dfa_helpers.hpp"
|
||||
#include "base/levenshtein_dfa.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace bookmarks
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct DocVecWrapper
|
||||
{
|
||||
explicit DocVecWrapper(DocVec const & dv) : m_dv(dv) {}
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachToken(Fn && fn) const
|
||||
{
|
||||
for (size_t i = 0; i < m_dv.GetNumTokens(); ++i)
|
||||
fn(StringUtf8Multilang::kDefaultCode, m_dv.GetToken(i));
|
||||
}
|
||||
|
||||
DocVec const & m_dv;
|
||||
};
|
||||
|
||||
struct RankingInfo
|
||||
{
|
||||
bool operator<(RankingInfo const & rhs) const { return m_cosineSimilarity > rhs.m_cosineSimilarity; }
|
||||
|
||||
bool operator>(RankingInfo const & rhs) const { return rhs < *this; }
|
||||
|
||||
bool operator==(RankingInfo const & rhs) const { return !(*this < rhs) && !(*this > rhs); }
|
||||
bool operator!=(RankingInfo const & rhs) const { return !(*this == rhs); }
|
||||
|
||||
double m_cosineSimilarity = 0.0;
|
||||
};
|
||||
|
||||
struct IdInfoPair
|
||||
{
|
||||
IdInfoPair(Id const & id, RankingInfo const & info) : m_id(id), m_info(info) {}
|
||||
|
||||
bool operator<(IdInfoPair const & rhs) const
|
||||
{
|
||||
if (m_info != rhs.m_info)
|
||||
return m_info < rhs.m_info;
|
||||
return m_id < rhs.m_id;
|
||||
}
|
||||
|
||||
Id m_id;
|
||||
RankingInfo m_info;
|
||||
};
|
||||
|
||||
void FillRankingInfo(QueryVec & qv, IdfMap & idfs, DocVec const & dv, RankingInfo & info)
|
||||
{
|
||||
info.m_cosineSimilarity = qv.Similarity(idfs, dv);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Processor::Processor(Emitter & emitter, base::Cancellable const & cancellable)
|
||||
: m_emitter(emitter)
|
||||
, m_cancellable(cancellable)
|
||||
{}
|
||||
|
||||
void Processor::Reset()
|
||||
{
|
||||
m_index = {};
|
||||
m_docs.clear();
|
||||
m_indexDescriptions = false;
|
||||
m_indexableGroups.clear();
|
||||
m_idToGroup.clear();
|
||||
m_bookmarksInGroup.clear();
|
||||
}
|
||||
|
||||
void Processor::EnableIndexingOfDescriptions(bool enable)
|
||||
{
|
||||
m_indexDescriptions = enable;
|
||||
}
|
||||
|
||||
void Processor::EnableIndexingOfBookmarkGroup(GroupId const & groupId, bool enable)
|
||||
{
|
||||
bool const wasIndexable = m_indexableGroups.count(groupId) > 0;
|
||||
if (enable)
|
||||
m_indexableGroups.insert(groupId);
|
||||
else
|
||||
m_indexableGroups.erase(groupId);
|
||||
bool const nowIndexable = m_indexableGroups.count(groupId) > 0;
|
||||
|
||||
if (wasIndexable == nowIndexable)
|
||||
return;
|
||||
|
||||
for (auto const & id : m_bookmarksInGroup[groupId])
|
||||
if (nowIndexable)
|
||||
AddToIndex(id);
|
||||
else
|
||||
EraseFromIndex(id);
|
||||
}
|
||||
|
||||
void Processor::Add(Id const & id, Doc const & doc)
|
||||
{
|
||||
ASSERT_EQUAL(m_docs.count(id), 0, ());
|
||||
|
||||
DocVec::Builder builder;
|
||||
doc.ForEachNameToken([&](int8_t /* lang */, strings::UniString const & token) { builder.Add(token); });
|
||||
|
||||
if (m_indexDescriptions)
|
||||
doc.ForEachDescriptionToken([&](int8_t /* lang */, strings::UniString const & token) { builder.Add(token); });
|
||||
|
||||
DocVec const docVec(builder);
|
||||
|
||||
m_docs[id] = docVec;
|
||||
}
|
||||
|
||||
void Processor::AddToIndex(Id const & id)
|
||||
{
|
||||
ASSERT_EQUAL(m_docs.count(id), 1, ());
|
||||
|
||||
m_index.Add(id, DocVecWrapper(m_docs[id]));
|
||||
}
|
||||
|
||||
void Processor::Update(Id const & id, Doc const & doc)
|
||||
{
|
||||
auto group = kInvalidGroupId;
|
||||
auto const groupIt = m_idToGroup.find(id);
|
||||
if (groupIt != m_idToGroup.end())
|
||||
{
|
||||
// A copy to avoid use-after-free.
|
||||
group = groupIt->second;
|
||||
DetachFromGroup(id, group);
|
||||
}
|
||||
|
||||
Erase(id);
|
||||
Add(id, doc);
|
||||
|
||||
if (group != kInvalidGroupId)
|
||||
AttachToGroup(id, group);
|
||||
}
|
||||
|
||||
void Processor::Erase(Id const & id)
|
||||
{
|
||||
ASSERT_EQUAL(m_docs.count(id), 1, ());
|
||||
|
||||
ASSERT(m_idToGroup.find(id) == m_idToGroup.end(),
|
||||
("A bookmark must be detached from all groups before being deleted."));
|
||||
|
||||
m_docs.erase(id);
|
||||
}
|
||||
|
||||
void Processor::EraseFromIndex(Id const & id)
|
||||
{
|
||||
ASSERT_EQUAL(m_docs.count(id), 1, ());
|
||||
|
||||
auto const & docVec = m_docs[id];
|
||||
m_index.Erase(id, DocVecWrapper(docVec));
|
||||
}
|
||||
|
||||
void Processor::AttachToGroup(Id const & id, GroupId const & group)
|
||||
{
|
||||
auto const it = m_idToGroup.find(id);
|
||||
if (it != m_idToGroup.end())
|
||||
LOG(LWARNING, ("Tried to attach bookmark", id, "to group", group, "but it already belongs to group", it->second));
|
||||
|
||||
m_idToGroup[id] = group;
|
||||
m_bookmarksInGroup[group].insert(id);
|
||||
if (m_indexableGroups.count(group) > 0)
|
||||
AddToIndex(id);
|
||||
}
|
||||
|
||||
void Processor::DetachFromGroup(Id const & id, GroupId const & group)
|
||||
{
|
||||
auto const it = m_idToGroup.find(id);
|
||||
if (it == m_idToGroup.end())
|
||||
{
|
||||
LOG(LWARNING, ("Tried to detach bookmark", id, "from group", group, "but it does not belong to any group"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (it->second != group)
|
||||
{
|
||||
LOG(LWARNING, ("Tried to detach bookmark", id, "from group", group, "but it only belongs to group", it->second));
|
||||
return;
|
||||
}
|
||||
|
||||
m_idToGroup.erase(it);
|
||||
m_bookmarksInGroup[group].erase(id);
|
||||
|
||||
if (m_indexableGroups.count(group) > 0)
|
||||
EraseFromIndex(id);
|
||||
|
||||
auto const groupIt = m_bookmarksInGroup.find(group);
|
||||
CHECK(groupIt != m_bookmarksInGroup.end(), (group, m_bookmarksInGroup));
|
||||
if (groupIt->second.size() == 0)
|
||||
m_bookmarksInGroup.erase(groupIt);
|
||||
}
|
||||
|
||||
void Processor::Search(Params const & params) const
|
||||
{
|
||||
std::set<Id> ids;
|
||||
auto insertId = [&ids](Id const & id, bool /* exactMatch */) { ids.insert(id); };
|
||||
|
||||
for (size_t i = 0; i < params.GetNumTokens(); ++i)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
auto const & token = params.GetToken(i);
|
||||
if (params.IsPrefixToken(i))
|
||||
Retrieve<strings::PrefixDFAModifier<strings::LevenshteinDFA>>(token, insertId);
|
||||
else
|
||||
Retrieve<strings::LevenshteinDFA>(token, insertId);
|
||||
}
|
||||
|
||||
IdfMap idfs(*this, 1.0 /* unknownIdf */);
|
||||
auto qv = GetQueryVec(idfs, params);
|
||||
|
||||
std::vector<IdInfoPair> idInfos;
|
||||
for (auto const & id : ids)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
if (params.m_groupId != kInvalidGroupId)
|
||||
{
|
||||
auto const it = m_idToGroup.find(id);
|
||||
if (it == m_idToGroup.end() || it->second != params.m_groupId)
|
||||
continue;
|
||||
}
|
||||
|
||||
auto it = m_docs.find(id);
|
||||
CHECK(it != m_docs.end(), ("Can't find retrieved doc:", id));
|
||||
auto const & doc = it->second;
|
||||
|
||||
RankingInfo info;
|
||||
FillRankingInfo(qv, idfs, doc, info);
|
||||
|
||||
idInfos.emplace_back(id, info);
|
||||
}
|
||||
|
||||
BailIfCancelled();
|
||||
sort(idInfos.begin(), idInfos.end());
|
||||
|
||||
size_t numEmitted = 0;
|
||||
for (auto const & idInfo : idInfos)
|
||||
{
|
||||
if (numEmitted >= params.m_maxNumResults)
|
||||
break;
|
||||
m_emitter.AddBookmarkResult(bookmarks::Result(idInfo.m_id));
|
||||
++numEmitted;
|
||||
}
|
||||
}
|
||||
|
||||
void Processor::Finish(bool cancelled)
|
||||
{
|
||||
m_emitter.Finish(cancelled);
|
||||
}
|
||||
|
||||
uint64_t Processor::GetNumDocs(strings::UniString const & token, bool isPrefix) const
|
||||
{
|
||||
return base::asserted_cast<uint64_t>(m_index.GetNumDocs(StringUtf8Multilang::kDefaultCode, token, isPrefix));
|
||||
}
|
||||
|
||||
QueryVec Processor::GetQueryVec(IdfMap & idfs, QueryParams const & params) const
|
||||
{
|
||||
QueryVec::Builder builder;
|
||||
for (size_t i = 0; i < params.GetNumTokens(); ++i)
|
||||
{
|
||||
auto const & token = params.GetToken(i).GetOriginal();
|
||||
if (params.IsPrefixToken(i))
|
||||
builder.SetPrefix(token);
|
||||
else
|
||||
builder.AddFull(token);
|
||||
}
|
||||
return {idfs, builder};
|
||||
}
|
||||
} // namespace bookmarks
|
||||
} // namespace search
|
||||
105
libs/search/bookmarks/processor.hpp
Normal file
105
libs/search/bookmarks/processor.hpp
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/base/mem_search_index.hpp"
|
||||
#include "search/bookmarks/types.hpp"
|
||||
#include "search/cancel_exception.hpp"
|
||||
#include "search/doc_vec.hpp"
|
||||
#include "search/feature_offset_match.hpp"
|
||||
#include "search/idf_map.hpp"
|
||||
#include "search/query_params.hpp"
|
||||
#include "search/search_params.hpp"
|
||||
#include "search/utils.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace base
|
||||
{
|
||||
class Cancellable;
|
||||
}
|
||||
|
||||
namespace search
|
||||
{
|
||||
class Emitter;
|
||||
|
||||
namespace bookmarks
|
||||
{
|
||||
class Processor : public IdfMap::Delegate
|
||||
{
|
||||
public:
|
||||
using Index = search_base::MemSearchIndex<Id>;
|
||||
|
||||
struct Params : public QueryParams
|
||||
{
|
||||
// If valid, only show results with bookmarks attached to |m_groupId|.
|
||||
GroupId m_groupId = kInvalidGroupId;
|
||||
|
||||
size_t m_maxNumResults = SearchParams::kDefaultNumResultsEverywhere;
|
||||
};
|
||||
|
||||
Processor(Emitter & emitter, base::Cancellable const & cancellable);
|
||||
~Processor() override = default;
|
||||
|
||||
void Reset();
|
||||
|
||||
// By default, only bookmark names are indexed. This method
|
||||
// should be used to enable or disable indexing bookmarks
|
||||
// by their descriptions.
|
||||
void EnableIndexingOfDescriptions(bool enable);
|
||||
|
||||
void EnableIndexingOfBookmarkGroup(GroupId const & groupId, bool enable);
|
||||
|
||||
// Adds a bookmark to Processor but does not index it.
|
||||
void Add(Id const & id, Doc const & doc);
|
||||
// Indexes an already added bookmark.
|
||||
void AddToIndex(Id const & id);
|
||||
// Updates a bookmark with a new |doc|. Re-indexes if the bookmarks
|
||||
// is already attached to an indexable group.
|
||||
void Update(Id const & id, Doc const & doc);
|
||||
|
||||
void Erase(Id const & id);
|
||||
void EraseFromIndex(Id const & id);
|
||||
|
||||
void AttachToGroup(Id const & id, GroupId const & group);
|
||||
void DetachFromGroup(Id const & id, GroupId const & group);
|
||||
|
||||
void Search(Params const & params) const;
|
||||
|
||||
void Finish(bool cancelled);
|
||||
|
||||
// IdfMap::Delegate overrides:
|
||||
uint64_t GetNumDocs(strings::UniString const & token, bool isPrefix) const override;
|
||||
|
||||
private:
|
||||
void BailIfCancelled() const { ::search::BailIfCancelled(m_cancellable); }
|
||||
|
||||
template <typename DFA, typename Fn>
|
||||
void Retrieve(QueryParams::Token const & token, Fn && fn) const
|
||||
{
|
||||
SearchTrieRequest<DFA> request;
|
||||
FillRequestFromToken(token, request);
|
||||
request.m_langs.insert(StringUtf8Multilang::kDefaultCode);
|
||||
|
||||
MatchFeaturesInTrie(request, m_index.GetRootIterator(), [](Id const & /* id */) { return true; } /* filter */,
|
||||
std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
QueryVec GetQueryVec(IdfMap & idfs, QueryParams const & params) const;
|
||||
|
||||
Emitter & m_emitter;
|
||||
base::Cancellable const & m_cancellable;
|
||||
|
||||
Index m_index;
|
||||
std::unordered_map<Id, DocVec> m_docs;
|
||||
|
||||
bool m_indexDescriptions = false;
|
||||
std::unordered_set<GroupId> m_indexableGroups;
|
||||
|
||||
// Currently a bookmark can belong to at most one group
|
||||
// but in the future it is possible for a single bookmark to be
|
||||
// attached to multiple groups.
|
||||
std::unordered_map<Id, GroupId> m_idToGroup;
|
||||
std::unordered_map<GroupId, std::unordered_set<Id>> m_bookmarksInGroup;
|
||||
};
|
||||
} // namespace bookmarks
|
||||
} // namespace search
|
||||
20
libs/search/bookmarks/results.hpp
Normal file
20
libs/search/bookmarks/results.hpp
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/bookmarks/types.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace bookmarks
|
||||
{
|
||||
struct Result
|
||||
{
|
||||
explicit Result(Id id) : m_id(id) {}
|
||||
|
||||
Id m_id = {};
|
||||
};
|
||||
|
||||
using Results = std::vector<Result>;
|
||||
} // namespace bookmarks
|
||||
} // namespace search
|
||||
9
libs/search/bookmarks/types.cpp
Normal file
9
libs/search/bookmarks/types.cpp
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#include "search/bookmarks/types.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace bookmarks
|
||||
{
|
||||
GroupId constexpr kInvalidGroupId = std::numeric_limits<GroupId>::max();
|
||||
} // namespace bookmarks
|
||||
} // namespace search
|
||||
19
libs/search/bookmarks/types.hpp
Normal file
19
libs/search/bookmarks/types.hpp
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/bookmarks/data.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace bookmarks
|
||||
{
|
||||
// todo(@m) s/Id/DocId/g ?
|
||||
using Id = uint64_t;
|
||||
using GroupId = uint64_t;
|
||||
using Doc = Data;
|
||||
|
||||
extern GroupId const kInvalidGroupId;
|
||||
} // namespace bookmarks
|
||||
} // namespace search
|
||||
17
libs/search/cancel_exception.hpp
Normal file
17
libs/search/cancel_exception.hpp
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/cancellable.hpp"
|
||||
#include "base/exception.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
// This exception can be thrown from the deep darkness of search and
|
||||
// geometry retrieval for fast cancellation of time-consuming tasks.
|
||||
DECLARE_EXCEPTION(CancelException, RootException);
|
||||
|
||||
inline void BailIfCancelled(base::Cancellable const & cancellable)
|
||||
{
|
||||
if (cancellable.IsCancelled())
|
||||
MYTHROW(CancelException, ("Cancelled"));
|
||||
}
|
||||
} // namespace search
|
||||
84
libs/search/categories_cache.cpp
Normal file
84
libs/search/categories_cache.cpp
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#include "search/categories_cache.hpp"
|
||||
|
||||
#include "search/mwm_context.hpp"
|
||||
#include "search/retrieval.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
// CategoriesCache ---------------------------------------------------------------------------------
|
||||
CBV CategoriesCache::Get(MwmContext const & context)
|
||||
{
|
||||
auto const id = context.m_handle.GetId();
|
||||
auto const it = m_cache.find(id);
|
||||
if (it != m_cache.cend())
|
||||
return it->second;
|
||||
|
||||
auto cbv = Load(context);
|
||||
m_cache[id] = cbv;
|
||||
return cbv;
|
||||
}
|
||||
|
||||
CBV CategoriesCache::Load(MwmContext const & context) const
|
||||
{
|
||||
auto const & c = classif();
|
||||
|
||||
// Any DFA will do, since we only use requests's m_categories,
|
||||
// but the interface of Retrieval forces us to make a choice.
|
||||
SearchTrieRequest<strings::UniStringDFA> request;
|
||||
|
||||
// m_categories usually has truncated types; add them together with their subtrees.
|
||||
m_categories.ForEach([&request, &c](uint32_t const type)
|
||||
{
|
||||
c.ForEachInSubtree([&](uint32_t descendantType)
|
||||
{ request.m_categories.emplace_back(FeatureTypeToString(c.GetIndexForType(descendantType))); }, type);
|
||||
});
|
||||
|
||||
Retrieval retrieval(context, m_cancellable);
|
||||
return retrieval.RetrieveAddressFeatures(request).m_features;
|
||||
}
|
||||
|
||||
// StreetsCache ------------------------------------------------------------------------------------
|
||||
StreetsCache::StreetsCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsStreetOrSquareChecker::Instance(), cancellable)
|
||||
{}
|
||||
|
||||
// SuburbsCache ------------------------------------------------------------------------------------
|
||||
SuburbsCache::SuburbsCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsSuburbChecker::Instance(), cancellable)
|
||||
{}
|
||||
// VillagesCache -----------------------------------------------------------------------------------
|
||||
VillagesCache::VillagesCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsVillageChecker::Instance(), cancellable)
|
||||
{}
|
||||
|
||||
// CountriesCache ----------------------------------------------------------------------------------
|
||||
CountriesCache::CountriesCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsCountryChecker::Instance(), cancellable)
|
||||
{}
|
||||
|
||||
// StatesCache -------------------------------------------------------------------------------------
|
||||
StatesCache::StatesCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsStateChecker::Instance(), cancellable)
|
||||
{}
|
||||
|
||||
// CitiesTownsOrVillagesCache ----------------------------------------------------------------------
|
||||
CitiesTownsOrVillagesCache::CitiesTownsOrVillagesCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsCityTownOrVillageChecker::Instance(), cancellable)
|
||||
{}
|
||||
|
||||
// HotelsCache -------------------------------------------------------------------------------------
|
||||
HotelsCache::HotelsCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsHotelChecker::Instance(), cancellable)
|
||||
{}
|
||||
|
||||
// FoodCache ---------------------------------------------------------------------------------------
|
||||
FoodCache::FoodCache(base::Cancellable const & cancellable)
|
||||
: CategoriesCache(ftypes::IsEatChecker::Instance(), cancellable)
|
||||
{}
|
||||
} // namespace search
|
||||
96
libs/search/categories_cache.hpp
Normal file
96
libs/search/categories_cache.hpp
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/categories_set.hpp"
|
||||
#include "search/cbv.hpp"
|
||||
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include "base/cancellable.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class MwmContext;
|
||||
|
||||
class CategoriesCache
|
||||
{
|
||||
public:
|
||||
template <typename TypesSource>
|
||||
CategoriesCache(TypesSource const & source, base::Cancellable const & cancellable) : m_cancellable(cancellable)
|
||||
{
|
||||
source.ForEachType([this](uint32_t type) { m_categories.Add(type); });
|
||||
}
|
||||
|
||||
CategoriesCache(std::vector<uint32_t> const & types, base::Cancellable const & cancellable)
|
||||
: m_cancellable(cancellable)
|
||||
{
|
||||
for (uint32_t type : types)
|
||||
m_categories.Add(type);
|
||||
}
|
||||
|
||||
virtual ~CategoriesCache() = default;
|
||||
|
||||
CBV Get(MwmContext const & context);
|
||||
|
||||
inline void Clear() { m_cache.clear(); }
|
||||
|
||||
private:
|
||||
CBV Load(MwmContext const & context) const;
|
||||
|
||||
CategoriesSet m_categories;
|
||||
base::Cancellable const & m_cancellable;
|
||||
std::map<MwmSet::MwmId, CBV> m_cache;
|
||||
};
|
||||
|
||||
class StreetsCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
StreetsCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
|
||||
class SuburbsCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
SuburbsCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
|
||||
class VillagesCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
VillagesCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
|
||||
class CountriesCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
CountriesCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
|
||||
class StatesCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
StatesCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
|
||||
// Used for cities/towns/villages from world. Currently we do not have villages in World.mwm but
|
||||
// it may be good to put some important villages to it: mountain/beach resorts.
|
||||
class CitiesTownsOrVillagesCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
CitiesTownsOrVillagesCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
|
||||
class HotelsCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
HotelsCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
|
||||
class FoodCache : public CategoriesCache
|
||||
{
|
||||
public:
|
||||
FoodCache(base::Cancellable const & cancellable);
|
||||
};
|
||||
} // namespace search
|
||||
34
libs/search/categories_set.hpp
Normal file
34
libs/search/categories_set.hpp
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class CategoriesSet
|
||||
{
|
||||
public:
|
||||
CategoriesSet() : m_classificator(classif()) {}
|
||||
|
||||
inline void Add(uint32_t type) { m_categories.insert(type); }
|
||||
|
||||
template <typename Fn>
|
||||
void ForEach(Fn && fn) const
|
||||
{
|
||||
std::for_each(m_categories.begin(), m_categories.end(), std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
private:
|
||||
Classificator const & m_classificator;
|
||||
std::unordered_set<uint32_t> m_categories;
|
||||
|
||||
DISALLOW_COPY_AND_MOVE(CategoriesSet);
|
||||
};
|
||||
} // namespace search
|
||||
126
libs/search/cbv.cpp
Normal file
126
libs/search/cbv.cpp
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
#include "search/cbv.hpp"
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace
|
||||
{
|
||||
uint64_t constexpr kModulo = 18446744073709551557LLU;
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
CBV const & CBV::GetFull()
|
||||
{
|
||||
static CBV const fullCBV(true /*full*/);
|
||||
return fullCBV;
|
||||
}
|
||||
|
||||
CBV::CBV(unique_ptr<coding::CompressedBitVector> p) : m_p(std::move(p)) {}
|
||||
|
||||
CBV::CBV(CBV && cbv) : m_p(std::move(cbv.m_p)), m_isFull(cbv.m_isFull)
|
||||
{
|
||||
cbv.m_isFull = false;
|
||||
}
|
||||
|
||||
CBV::CBV(bool full) : m_isFull(full) {}
|
||||
|
||||
CBV & CBV::operator=(unique_ptr<coding::CompressedBitVector> p)
|
||||
{
|
||||
m_p = std::move(p);
|
||||
m_isFull = false;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
CBV & CBV::operator=(CBV && rhs)
|
||||
{
|
||||
if (this == &rhs)
|
||||
return *this;
|
||||
|
||||
m_p = std::move(rhs.m_p);
|
||||
m_isFull = rhs.m_isFull;
|
||||
|
||||
rhs.m_isFull = false;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CBV::SetFull()
|
||||
{
|
||||
m_p.Reset();
|
||||
m_isFull = true;
|
||||
}
|
||||
|
||||
void CBV::Reset()
|
||||
{
|
||||
m_p.Reset();
|
||||
m_isFull = false;
|
||||
}
|
||||
|
||||
bool CBV::HasBit(uint64_t id) const
|
||||
{
|
||||
if (IsFull())
|
||||
return true;
|
||||
if (IsEmpty())
|
||||
return false;
|
||||
return m_p->GetBit(id);
|
||||
}
|
||||
|
||||
uint64_t CBV::PopCount() const
|
||||
{
|
||||
ASSERT(!IsFull(), ());
|
||||
if (IsEmpty())
|
||||
return 0;
|
||||
return m_p->PopCount();
|
||||
}
|
||||
|
||||
CBV CBV::Union(CBV const & rhs) const
|
||||
{
|
||||
if (IsFull() || rhs.IsEmpty())
|
||||
return *this;
|
||||
if (IsEmpty() || rhs.IsFull())
|
||||
return rhs;
|
||||
return CBV(coding::CompressedBitVector::Union(*m_p, *rhs.m_p));
|
||||
}
|
||||
|
||||
CBV CBV::Intersect(CBV const & rhs) const
|
||||
{
|
||||
if (IsFull() || rhs.IsEmpty())
|
||||
return rhs;
|
||||
if (IsEmpty() || rhs.IsFull())
|
||||
return *this;
|
||||
return CBV(coding::CompressedBitVector::Intersect(*m_p, *rhs.m_p));
|
||||
}
|
||||
|
||||
CBV CBV::Take(uint64_t n) const
|
||||
{
|
||||
if (IsEmpty())
|
||||
return *this;
|
||||
if (IsFull())
|
||||
{
|
||||
vector<uint64_t> groups(static_cast<size_t>((n + 63) / 64), numeric_limits<uint64_t>::max());
|
||||
uint64_t const r = n % 64;
|
||||
if (r != 0)
|
||||
{
|
||||
ASSERT(!groups.empty(), ());
|
||||
groups.back() = (static_cast<uint64_t>(1) << r) - 1;
|
||||
}
|
||||
return CBV(coding::DenseCBV::BuildFromBitGroups(std::move(groups)));
|
||||
}
|
||||
|
||||
return CBV(m_p->LeaveFirstSetNBits(n));
|
||||
}
|
||||
|
||||
uint64_t CBV::Hash() const
|
||||
{
|
||||
if (IsEmpty())
|
||||
return 0;
|
||||
if (IsFull())
|
||||
return kModulo;
|
||||
return coding::CompressedBitVectorHasher::Hash(*m_p) % kModulo;
|
||||
}
|
||||
} // namespace search
|
||||
64
libs/search/cbv.hpp
Normal file
64
libs/search/cbv.hpp
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
#pragma once
|
||||
|
||||
#include "coding/compressed_bit_vector.hpp"
|
||||
|
||||
#include "base/ref_counted.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// A wrapper around coding::CompressedBitVector that augments the
|
||||
// latter with the "full" state and uses reference counting for
|
||||
// ownership sharing.
|
||||
class CBV
|
||||
{
|
||||
public:
|
||||
static CBV const & GetFull();
|
||||
|
||||
CBV() = default;
|
||||
explicit CBV(std::unique_ptr<coding::CompressedBitVector> p);
|
||||
CBV(CBV const & cbv) = default;
|
||||
CBV(CBV && cbv);
|
||||
|
||||
inline operator bool() const { return !IsEmpty(); }
|
||||
CBV & operator=(std::unique_ptr<coding::CompressedBitVector> p);
|
||||
CBV & operator=(CBV const & rhs) = default;
|
||||
CBV & operator=(CBV && rhs);
|
||||
|
||||
void SetFull();
|
||||
void Reset();
|
||||
|
||||
inline bool IsEmpty() const { return !m_isFull && coding::CompressedBitVector::IsEmpty(m_p.Get()); }
|
||||
inline bool IsFull() const { return m_isFull; }
|
||||
|
||||
bool HasBit(uint64_t id) const;
|
||||
uint64_t PopCount() const;
|
||||
|
||||
template <typename Fn>
|
||||
void ForEach(Fn && fn) const
|
||||
{
|
||||
ASSERT(!m_isFull, ());
|
||||
if (!IsEmpty())
|
||||
coding::CompressedBitVectorEnumerator::ForEach(*m_p, std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
CBV Union(CBV const & rhs) const;
|
||||
CBV Intersect(CBV const & rhs) const;
|
||||
|
||||
// Takes first set |n| bits.
|
||||
CBV Take(uint64_t n) const;
|
||||
|
||||
uint64_t Hash() const;
|
||||
|
||||
private:
|
||||
explicit CBV(bool full);
|
||||
|
||||
base::RefCountPtr<coding::CompressedBitVector> m_p;
|
||||
|
||||
// True iff all bits are set to one.
|
||||
bool m_isFull = false;
|
||||
};
|
||||
} // namespace search
|
||||
139
libs/search/cities_boundaries_table.cpp
Normal file
139
libs/search/cities_boundaries_table.cpp
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
#include "search/cities_boundaries_table.hpp"
|
||||
|
||||
#include "search/categories_cache.hpp"
|
||||
#include "search/localities_source.hpp"
|
||||
#include "search/mwm_context.hpp"
|
||||
|
||||
#include "indexer/cities_boundaries_serdes.hpp"
|
||||
#include "indexer/mwm_set.hpp"
|
||||
#include "indexer/utils.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/cancellable.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace indexer;
|
||||
using namespace std;
|
||||
|
||||
// CitiesBoundariesTable::Boundaries ---------------------------------------------------------------
|
||||
bool CitiesBoundariesTable::Boundaries::HasPoint(m2::PointD const & p) const
|
||||
{
|
||||
return any_of(m_boundaries.begin(), m_boundaries.end(), [&](CityBoundary const & b) { return b.HasPoint(p, m_eps); });
|
||||
}
|
||||
|
||||
std::string DebugPrint(CitiesBoundariesTable::Boundaries const & boundaries)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "Boundaries [";
|
||||
os << ::DebugPrint(boundaries.m_boundaries) << ", ";
|
||||
os << "eps: " << boundaries.m_eps;
|
||||
os << "]";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
// CitiesBoundariesTable ---------------------------------------------------------------------------
|
||||
bool CitiesBoundariesTable::Load()
|
||||
{
|
||||
auto handle = FindWorld(m_dataSource);
|
||||
if (!handle.IsAlive())
|
||||
{
|
||||
LOG(LWARNING, ("Can't find World map file."));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip if table was already loaded from this file.
|
||||
if (handle.GetId() == m_mwmId)
|
||||
return true;
|
||||
|
||||
MwmContext context(std::move(handle));
|
||||
base::Cancellable const cancellable;
|
||||
auto const localities = CategoriesCache(LocalitiesSource{}, cancellable).Get(context);
|
||||
|
||||
auto const & cont = context.m_value.m_cont;
|
||||
|
||||
if (!cont.IsExist(CITIES_BOUNDARIES_FILE_TAG))
|
||||
{
|
||||
LOG(LWARNING, ("No cities boundaries table in the world map."));
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<vector<CityBoundary>> all;
|
||||
double precision;
|
||||
|
||||
try
|
||||
{
|
||||
auto reader = cont.GetReader(CITIES_BOUNDARIES_FILE_TAG);
|
||||
ReaderSource<ReaderPtr<ModelReader>> source(reader);
|
||||
CitiesBoundariesSerDes::Deserialize(source, all, precision);
|
||||
}
|
||||
catch (Reader::Exception const & e)
|
||||
{
|
||||
LOG(LERROR, ("Can't read cities boundaries table from the world map:", e.Msg()));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (all.size() != localities.PopCount())
|
||||
{
|
||||
LOG(LERROR, ("Wrong number of boundaries, expected:", localities.PopCount(), "actual:", all.size()));
|
||||
return false;
|
||||
}
|
||||
|
||||
m_mwmId = context.GetId();
|
||||
m_table.clear();
|
||||
m_eps = precision;
|
||||
size_t idx = 0, notEmpty = 0;
|
||||
localities.ForEach([&](uint64_t fid)
|
||||
{
|
||||
if (!all[idx].empty())
|
||||
{
|
||||
CHECK(m_table.emplace(base::asserted_cast<uint32_t>(fid), std::move(all[idx])).second, ());
|
||||
++notEmpty;
|
||||
}
|
||||
++idx;
|
||||
});
|
||||
|
||||
LOG(LDEBUG, ("Localities count =", idx, "; with boundary =", notEmpty));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CitiesBoundariesTable::Get(FeatureID const & fid, Boundaries & bs) const
|
||||
{
|
||||
if (fid.m_mwmId != m_mwmId)
|
||||
return false;
|
||||
return Get(fid.m_index, bs);
|
||||
}
|
||||
|
||||
bool CitiesBoundariesTable::Get(uint32_t fid, Boundaries & bs) const
|
||||
{
|
||||
auto const it = m_table.find(fid);
|
||||
if (it == m_table.end())
|
||||
return false;
|
||||
bs = Boundaries(it->second, m_eps);
|
||||
return true;
|
||||
}
|
||||
|
||||
void GetCityBoundariesInRectForTesting(CitiesBoundariesTable const & table, m2::RectD const & rect,
|
||||
vector<uint32_t> & featureIds)
|
||||
{
|
||||
featureIds.clear();
|
||||
for (auto const & kv : table.m_table)
|
||||
{
|
||||
for (auto const & cb : kv.second)
|
||||
{
|
||||
if (rect.IsIntersect(cb.m_bbox.ToRect()))
|
||||
{
|
||||
featureIds.push_back(kv.first);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace search
|
||||
86
libs/search/cities_boundaries_table.hpp
Normal file
86
libs/search/cities_boundaries_table.hpp
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/city_boundary.hpp"
|
||||
#include "indexer/feature_decl.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace search
|
||||
{
|
||||
class CitiesBoundariesTable
|
||||
{
|
||||
friend void GetCityBoundariesInRectForTesting(CitiesBoundariesTable const &, m2::RectD const & rect,
|
||||
std::vector<uint32_t> & featureIds);
|
||||
|
||||
public:
|
||||
class Boundaries
|
||||
{
|
||||
public:
|
||||
Boundaries() = default;
|
||||
|
||||
Boundaries(std::vector<indexer::CityBoundary> const & boundaries, double eps) : m_boundaries(boundaries), m_eps(eps)
|
||||
{}
|
||||
|
||||
// Returns true iff |p| is inside any of the regions bounded by
|
||||
// |*this|.
|
||||
bool HasPoint(m2::PointD const & p) const;
|
||||
|
||||
m2::RectD GetLimitRect() const
|
||||
{
|
||||
m2::RectD rect;
|
||||
for (auto const & boundary : m_boundaries)
|
||||
{
|
||||
rect.Add(boundary.m_bbox.Min());
|
||||
rect.Add(boundary.m_bbox.Max());
|
||||
}
|
||||
return rect;
|
||||
}
|
||||
|
||||
size_t GetCount() const { return m_boundaries.size(); }
|
||||
|
||||
template <class FnT>
|
||||
void ForEachBoundary(FnT && fn) const
|
||||
{
|
||||
for (size_t i = 0; i < m_boundaries.size(); ++i)
|
||||
fn(m_boundaries[i], i);
|
||||
}
|
||||
|
||||
friend std::string DebugPrint(Boundaries const & boundaries);
|
||||
|
||||
private:
|
||||
std::vector<indexer::CityBoundary> m_boundaries;
|
||||
double m_eps = 0.0;
|
||||
};
|
||||
|
||||
explicit CitiesBoundariesTable(DataSource const & dataSource) : m_dataSource(dataSource) {}
|
||||
|
||||
bool Load();
|
||||
|
||||
bool Has(FeatureID const & fid) const { return fid.m_mwmId == m_mwmId && Has(fid.m_index); }
|
||||
bool Has(uint32_t fid) const { return m_table.find(fid) != m_table.end(); }
|
||||
|
||||
bool Get(FeatureID const & fid, Boundaries & bs) const;
|
||||
bool Get(uint32_t fid, Boundaries & bs) const;
|
||||
|
||||
size_t GetSize() const { return m_table.size(); }
|
||||
|
||||
private:
|
||||
DataSource const & m_dataSource;
|
||||
MwmSet::MwmId m_mwmId;
|
||||
std::unordered_map<uint32_t, std::vector<indexer::CityBoundary>> m_table;
|
||||
double m_eps = 0.0;
|
||||
};
|
||||
|
||||
/// \brief Fills |featureIds| with feature ids of city boundaries if bounding rect of
|
||||
/// the city boundary crosses |rect|.
|
||||
/// \note This method is inefficient and is written for debug and test purposes only.
|
||||
void GetCityBoundariesInRectForTesting(CitiesBoundariesTable const &, m2::RectD const & rect,
|
||||
std::vector<uint32_t> & featureIds);
|
||||
} // namespace search
|
||||
39
libs/search/city_finder.cpp
Normal file
39
libs/search/city_finder.cpp
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#include "search/city_finder.hpp"
|
||||
|
||||
#include "indexer/feature_decl.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search
|
||||
{
|
||||
CityFinder::CityFinder(DataSource const & dataSource)
|
||||
: m_unusedBoundaries(dataSource)
|
||||
, m_unusedCache(m_cancellable)
|
||||
, m_finder(dataSource, m_unusedBoundaries, m_unusedCache)
|
||||
{}
|
||||
|
||||
string CityFinder::GetCityName(m2::PointD const & p, int8_t lang)
|
||||
{
|
||||
string_view city;
|
||||
m_finder.GetLocality(p, [&](LocalityItem const & item) { item.GetSpecifiedOrDefaultName(lang, city); });
|
||||
|
||||
// Return string, because m_finder.GetLocality() is not persistent.
|
||||
return std::string(city);
|
||||
}
|
||||
|
||||
string CityFinder::GetCityReadableName(m2::PointD const & p)
|
||||
{
|
||||
string_view city;
|
||||
m_finder.GetLocality(p, [&](LocalityItem const & item) { item.GetReadableName(city); });
|
||||
|
||||
// Return string, because m_finder.GetLocality() is not persistent.
|
||||
return std::string(city);
|
||||
}
|
||||
|
||||
FeatureID CityFinder::GetCityFeatureID(m2::PointD const & p)
|
||||
{
|
||||
FeatureID id;
|
||||
m_finder.GetLocality(p, [&id](LocalityItem const & item) { id = item.m_id; });
|
||||
return id;
|
||||
}
|
||||
} // namespace search
|
||||
36
libs/search/city_finder.hpp
Normal file
36
libs/search/city_finder.hpp
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/categories_cache.hpp"
|
||||
#include "search/locality_finder.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
|
||||
#include "base/cancellable.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
class DataSource;
|
||||
struct FeatureID;
|
||||
|
||||
namespace search
|
||||
{
|
||||
class CityFinder
|
||||
{
|
||||
public:
|
||||
// TODO (@milchakov): consider to reuse locality finder from search
|
||||
// engine. Otherwise, CityFinder won't benefit from approximated
|
||||
// cities boundaries.
|
||||
explicit CityFinder(DataSource const & dataSource);
|
||||
|
||||
std::string GetCityName(m2::PointD const & p, int8_t lang);
|
||||
std::string GetCityReadableName(m2::PointD const & p);
|
||||
FeatureID GetCityFeatureID(m2::PointD const & p);
|
||||
|
||||
private:
|
||||
base::Cancellable m_cancellable;
|
||||
search::CitiesBoundariesTable m_unusedBoundaries;
|
||||
search::VillagesCache m_unusedCache;
|
||||
search::LocalityFinder m_finder;
|
||||
};
|
||||
} // namespace search
|
||||
31
libs/search/common.hpp
Normal file
31
libs/search/common.hpp
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/categories_holder.hpp"
|
||||
|
||||
#include "base/buffer_vector.hpp"
|
||||
#include "base/small_set.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
// The prefix is stored separately.
|
||||
// todo(@m, @y) Find a way (similar to TokenSlice maybe?) to unify
|
||||
// the prefix and non-prefix tokens.
|
||||
using QueryTokens = buffer_vector<strings::UniString, 32>;
|
||||
|
||||
using Locales = base::SafeSmallSet<CategoriesHolder::kLocaleMapping.size() + 1>;
|
||||
|
||||
/// Upper bound for max count of tokens for indexing and scoring.
|
||||
size_t constexpr kMaxNumTokens = 32;
|
||||
size_t constexpr kMaxNumSuggests = 5;
|
||||
|
||||
struct QueryString
|
||||
{
|
||||
std::string m_query; ///< raw UTF8 query string
|
||||
QueryTokens m_tokens; ///< splitted by UniChar tokens (not including last prefix)
|
||||
strings::UniString m_prefix; ///< last prefix or empty (if query is ended with separator)
|
||||
|
||||
bool IsEmpty() const { return m_tokens.empty() && m_prefix.empty(); }
|
||||
};
|
||||
|
||||
} // namespace search
|
||||
96
libs/search/cuisine_filter.cpp
Normal file
96
libs/search/cuisine_filter.cpp
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
#include "search/cuisine_filter.hpp"
|
||||
|
||||
#include "indexer/cuisines.hpp"
|
||||
#include "indexer/feature.hpp"
|
||||
#include "indexer/feature_meta.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "platform/mwm_traits.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace cuisine_filter
|
||||
{
|
||||
// Description -------------------------------------------------------------------------------------
|
||||
Description::Description(FeatureType & ft)
|
||||
{
|
||||
m_types.clear();
|
||||
ft.ForEachType([this](uint32_t t)
|
||||
{
|
||||
if (ftypes::IsCuisineChecker::Instance()(t))
|
||||
m_types.push_back(t);
|
||||
});
|
||||
}
|
||||
|
||||
CuisineFilter::ScopedFilter::ScopedFilter(MwmSet::MwmId const & mwmId, Descriptions const & descriptions,
|
||||
vector<uint32_t> const & types)
|
||||
: m_mwmId(mwmId)
|
||||
, m_descriptions(descriptions)
|
||||
, m_types(types)
|
||||
{
|
||||
sort(m_types.begin(), m_types.end());
|
||||
}
|
||||
|
||||
bool CuisineFilter::ScopedFilter::Matches(FeatureID const & fid) const
|
||||
{
|
||||
if (fid.m_mwmId != m_mwmId)
|
||||
return false;
|
||||
|
||||
auto it = lower_bound(m_descriptions.begin(), m_descriptions.end(), make_pair(fid.m_index, Description{}),
|
||||
[](pair<uint32_t, Description> const & lhs, pair<uint32_t, Description> const & rhs)
|
||||
{ return lhs.first < rhs.first; });
|
||||
if (it == m_descriptions.end() || it->first != fid.m_index)
|
||||
return false;
|
||||
|
||||
for (auto const t : it->second.m_types)
|
||||
if (binary_search(m_types.begin(), m_types.end(), t))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// CuisineFilter ------------------------------------------------------------------------------------
|
||||
CuisineFilter::CuisineFilter(FoodCache & food) : m_food(food) {}
|
||||
|
||||
unique_ptr<CuisineFilter::ScopedFilter> CuisineFilter::MakeScopedFilter(MwmContext const & context,
|
||||
vector<uint32_t> const & types)
|
||||
{
|
||||
if (types.empty())
|
||||
return {};
|
||||
return make_unique<ScopedFilter>(context.GetId(), GetDescriptions(context), types);
|
||||
}
|
||||
|
||||
void CuisineFilter::ClearCaches()
|
||||
{
|
||||
m_descriptions.clear();
|
||||
}
|
||||
|
||||
CuisineFilter::Descriptions const & CuisineFilter::GetDescriptions(MwmContext const & context)
|
||||
{
|
||||
auto const & mwmId = context.GetId();
|
||||
auto const it = m_descriptions.find(mwmId);
|
||||
if (it != m_descriptions.end())
|
||||
return it->second;
|
||||
|
||||
auto & value = context.m_value;
|
||||
version::MwmTraits mwmTraits(value.GetMwmVersion());
|
||||
|
||||
auto const food = m_food.Get(context);
|
||||
auto & descriptions = m_descriptions[mwmId];
|
||||
food.ForEach([&descriptions, &context](uint64_t bit)
|
||||
{
|
||||
auto const id = base::asserted_cast<uint32_t>(bit);
|
||||
auto ft = context.GetFeature(id);
|
||||
if (ft)
|
||||
descriptions.emplace_back(id, Description(*ft));
|
||||
});
|
||||
return descriptions;
|
||||
}
|
||||
} // namespace cuisine_filter
|
||||
} // namespace search
|
||||
58
libs/search/cuisine_filter.hpp
Normal file
58
libs/search/cuisine_filter.hpp
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/categories_cache.hpp"
|
||||
#include "search/mwm_context.hpp"
|
||||
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
class FeatureType;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace cuisine_filter
|
||||
{
|
||||
struct Description
|
||||
{
|
||||
Description() = default;
|
||||
Description(FeatureType & ft);
|
||||
|
||||
std::vector<uint32_t> m_types;
|
||||
};
|
||||
|
||||
class CuisineFilter
|
||||
{
|
||||
public:
|
||||
using Descriptions = std::vector<std::pair<uint32_t, Description>>;
|
||||
|
||||
class ScopedFilter
|
||||
{
|
||||
public:
|
||||
ScopedFilter(MwmSet::MwmId const & mwmId, Descriptions const & descriptions, std::vector<uint32_t> const & types);
|
||||
|
||||
bool Matches(FeatureID const & fid) const;
|
||||
|
||||
private:
|
||||
MwmSet::MwmId const m_mwmId;
|
||||
Descriptions const & m_descriptions;
|
||||
std::vector<uint32_t> m_types;
|
||||
};
|
||||
|
||||
CuisineFilter(FoodCache & food);
|
||||
|
||||
std::unique_ptr<ScopedFilter> MakeScopedFilter(MwmContext const & context, std::vector<uint32_t> const & types);
|
||||
|
||||
void ClearCaches();
|
||||
|
||||
private:
|
||||
Descriptions const & GetDescriptions(MwmContext const & context);
|
||||
|
||||
FoodCache & m_food;
|
||||
std::map<MwmSet::MwmId, Descriptions> m_descriptions;
|
||||
};
|
||||
} // namespace cuisine_filter
|
||||
} // namespace search
|
||||
28
libs/search/displayed_categories.cpp
Normal file
28
libs/search/displayed_categories.cpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include "search/displayed_categories.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace search
|
||||
{
|
||||
DisplayedCategories::DisplayedCategories(CategoriesHolder const & holder) : m_holder(holder)
|
||||
{
|
||||
m_keys = {"category_eat", "category_hotel", "category_food", "category_tourism",
|
||||
"category_wifi", "category_transport", "category_fuel", "category_parking",
|
||||
"category_shopping", "category_secondhand", "category_atm", "category_nightlife",
|
||||
"category_children", "category_bank", "category_entertainment", "category_water",
|
||||
"category_hospital", "category_pharmacy", "category_recycling", "category_rv",
|
||||
"category_police", "category_toilet", "category_post"};
|
||||
}
|
||||
|
||||
void DisplayedCategories::Modify(CategoriesModifier & modifier)
|
||||
{
|
||||
modifier.Modify(m_keys);
|
||||
}
|
||||
|
||||
std::vector<std::string> const & DisplayedCategories::GetKeys() const
|
||||
{
|
||||
return m_keys;
|
||||
}
|
||||
} // namespace search
|
||||
57
libs/search/displayed_categories.hpp
Normal file
57
libs/search/displayed_categories.hpp
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/categories_holder.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class CategoriesModifier;
|
||||
// *NOTE* This class is not thread-safe.
|
||||
class DisplayedCategories
|
||||
{
|
||||
public:
|
||||
using Keys = std::vector<std::string>;
|
||||
|
||||
DisplayedCategories(CategoriesHolder const & holder);
|
||||
|
||||
void Modify(CategoriesModifier & modifier);
|
||||
|
||||
// Returns a list of English names of displayed categories for the categories search tab.
|
||||
// The list may be modified during the application runtime in order to support sponsored or
|
||||
// featured categories. Keys may be used as parts of resources ids.
|
||||
Keys const & GetKeys() const;
|
||||
|
||||
// Calls |fn| on each pair (synonym name, synonym locale) for the
|
||||
// |key|.
|
||||
template <typename Fn>
|
||||
void ForEachSynonym(std::string const & key, Fn && fn) const
|
||||
{
|
||||
auto const & translations = m_holder.GetGroupTranslations();
|
||||
auto const it = translations.find("@" + key);
|
||||
if (it == translations.end())
|
||||
return;
|
||||
|
||||
for (auto const & name : it->second)
|
||||
fn(name.m_name, CategoriesHolder::MapIntegerToLocale(name.m_locale));
|
||||
}
|
||||
|
||||
static bool IsLanguageSupported(std::string_view locale)
|
||||
{
|
||||
return CategoriesHolder::MapLocaleToInteger(locale) != CategoriesHolder::kUnsupportedLocaleCode;
|
||||
}
|
||||
|
||||
private:
|
||||
CategoriesHolder const & m_holder;
|
||||
Keys m_keys;
|
||||
};
|
||||
|
||||
class CategoriesModifier
|
||||
{
|
||||
public:
|
||||
virtual ~CategoriesModifier() = default;
|
||||
|
||||
virtual void Modify(DisplayedCategories::Keys & keys) = 0;
|
||||
};
|
||||
} // namespace search
|
||||
233
libs/search/doc_vec.cpp
Normal file
233
libs/search/doc_vec.cpp
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
#include "search/doc_vec.hpp"
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
// Accumulates frequencies of equal tokens in |tfs|. Result is sorted
|
||||
// by tokens.
|
||||
void SortAndMerge(vector<strings::UniString> tokens, vector<TokenFrequencyPair> & tfs)
|
||||
{
|
||||
ASSERT(tfs.empty(), ());
|
||||
sort(tokens.begin(), tokens.end());
|
||||
for (size_t i = 0; i < tokens.size(); ++i)
|
||||
if (tfs.empty() || tfs.back().m_token != tokens[i])
|
||||
tfs.emplace_back(tokens[i], 1 /* frequency */);
|
||||
else
|
||||
++tfs.back().m_frequency;
|
||||
}
|
||||
|
||||
double GetTfIdf(double tf, double idf)
|
||||
{
|
||||
return tf * idf;
|
||||
}
|
||||
|
||||
double GetWeightImpl(IdfMap & idfs, TokenFrequencyPair const & tf, bool isPrefix)
|
||||
{
|
||||
return GetTfIdf(tf.m_frequency, idfs.Get(tf.m_token, isPrefix));
|
||||
}
|
||||
|
||||
double GetSqrWeightImpl(IdfMap & idfs, TokenFrequencyPair const & tf, bool isPrefix)
|
||||
{
|
||||
auto const w = GetWeightImpl(idfs, tf, isPrefix);
|
||||
return w * w;
|
||||
}
|
||||
|
||||
// Computes squared L2 norm of vector of tokens.
|
||||
double SqrL2(IdfMap & idfs, vector<TokenFrequencyPair> const & tfs)
|
||||
{
|
||||
double sum = 0;
|
||||
for (auto const & tf : tfs)
|
||||
sum += GetSqrWeightImpl(idfs, tf, false /* isPrefix */);
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Computes squared L2 norm of vector of tokens + prefix token.
|
||||
double SqrL2(IdfMap & idfs, vector<TokenFrequencyPair> const & tfs, optional<strings::UniString> const & prefix)
|
||||
{
|
||||
auto result = SqrL2(idfs, tfs);
|
||||
if (prefix)
|
||||
result += GetSqrWeightImpl(idfs, TokenFrequencyPair(*prefix, 1 /* frequency */), true /* isPrefix */);
|
||||
return result;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// TokenFrequencyPair ------------------------------------------------------------------------------
|
||||
bool TokenFrequencyPair::operator<(TokenFrequencyPair const & rhs) const
|
||||
{
|
||||
if (m_token != rhs.m_token)
|
||||
return m_token < rhs.m_token;
|
||||
return m_frequency < rhs.m_frequency;
|
||||
}
|
||||
|
||||
void TokenFrequencyPair::Swap(TokenFrequencyPair & rhs)
|
||||
{
|
||||
m_token.swap(rhs.m_token);
|
||||
swap(m_frequency, rhs.m_frequency);
|
||||
}
|
||||
|
||||
string DebugPrint(TokenFrequencyPair const & tf)
|
||||
{
|
||||
ostringstream os;
|
||||
os << "TokenFrequencyPair [" << DebugPrint(tf.m_token) << ", " << tf.m_frequency << "]";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
// DocVec ------------------------------------------------------------------------------------------
|
||||
DocVec::DocVec(Builder const & builder)
|
||||
{
|
||||
SortAndMerge(builder.m_tokens, m_tfs);
|
||||
}
|
||||
|
||||
double DocVec::Norm(IdfMap & idfs) const
|
||||
{
|
||||
return SqrL2(idfs, m_tfs);
|
||||
}
|
||||
|
||||
strings::UniString const & DocVec::GetToken(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, m_tfs.size(), ());
|
||||
return m_tfs[i].m_token;
|
||||
}
|
||||
|
||||
double DocVec::GetIdf(IdfMap & idfs, size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, m_tfs.size(), ());
|
||||
return idfs.Get(m_tfs[i].m_token, false /* isPrefix */);
|
||||
}
|
||||
|
||||
double DocVec::GetWeight(IdfMap & idfs, size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, m_tfs.size(), ());
|
||||
return GetWeightImpl(idfs, m_tfs[i], false /* isPrefix */);
|
||||
}
|
||||
|
||||
// QueryVec ----------------------------------------------------------------------------------------
|
||||
QueryVec::QueryVec(IdfMap & idfs, Builder const & builder) : m_idfs(&idfs), m_prefix(builder.m_prefix)
|
||||
{
|
||||
SortAndMerge(builder.m_tokens, m_tfs);
|
||||
}
|
||||
|
||||
double QueryVec::Similarity(IdfMap & docIdfs, DocVec const & rhs)
|
||||
{
|
||||
size_t kInvalidIndex = numeric_limits<size_t>::max();
|
||||
|
||||
if (Empty() && rhs.Empty())
|
||||
return 1.0;
|
||||
|
||||
if (Empty() || rhs.Empty())
|
||||
return 0.0;
|
||||
|
||||
vector<size_t> rsMatchTo(rhs.GetNumTokens(), kInvalidIndex);
|
||||
|
||||
double dot = 0;
|
||||
{
|
||||
size_t i = 0, j = 0;
|
||||
|
||||
while (i < m_tfs.size() && j < rhs.GetNumTokens())
|
||||
{
|
||||
auto const & lt = m_tfs[i].m_token;
|
||||
auto const & rt = rhs.GetToken(j);
|
||||
|
||||
if (lt < rt)
|
||||
{
|
||||
++i;
|
||||
}
|
||||
else if (lt > rt)
|
||||
{
|
||||
++j;
|
||||
}
|
||||
else
|
||||
{
|
||||
dot += GetFullTokenWeight(i) * rhs.GetWeight(docIdfs, j);
|
||||
rsMatchTo[j] = i;
|
||||
++i;
|
||||
++j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto const ln = Norm();
|
||||
auto const rn = rhs.Norm(docIdfs);
|
||||
|
||||
// This similarity metric assumes that prefix is not matched in the document.
|
||||
double const similarityNoPrefix = ln > 0 && rn > 0 ? dot / sqrt(ln) / sqrt(rn) : 0;
|
||||
|
||||
if (!m_prefix)
|
||||
return similarityNoPrefix;
|
||||
|
||||
double similarityWithPrefix = 0;
|
||||
auto const & prefix = *m_prefix;
|
||||
|
||||
// Let's try to match prefix token with all tokens in the
|
||||
// document, and compute the best cosine distance.
|
||||
for (size_t j = 0; j < rhs.GetNumTokens(); ++j)
|
||||
{
|
||||
auto const & t = rhs.GetToken(j);
|
||||
if (!strings::StartsWith(t.begin(), t.end(), prefix.begin(), prefix.end()))
|
||||
continue;
|
||||
|
||||
auto const i = rsMatchTo[j];
|
||||
|
||||
double num = 0;
|
||||
double denom = 0;
|
||||
if (i == kInvalidIndex)
|
||||
{
|
||||
// If this document token is not matched with full tokens in a
|
||||
// query, we need to update its weight in the cosine distance
|
||||
// - so we need to update correspondingly dot product and
|
||||
// vector norms of query and doc.
|
||||
auto const oldW = GetPrefixTokenWeight();
|
||||
auto const newW = GetTfIdf(1 /* frequency */, rhs.GetIdf(docIdfs, j));
|
||||
auto const l = max(0.0, ln - oldW * oldW + newW * newW);
|
||||
|
||||
num = dot + newW * rhs.GetWeight(docIdfs, j);
|
||||
denom = sqrt(l) * sqrt(rn);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If this document token is already matched with |i|-th full
|
||||
// token in a query - we know that completion of the prefix
|
||||
// token is the |i|-th query token. So we need to update
|
||||
// correspondingly dot product and vector norm of the query.
|
||||
auto const oldFW = GetFullTokenWeight(i);
|
||||
auto const oldPW = GetPrefixTokenWeight();
|
||||
|
||||
auto const tf = m_tfs[i].m_frequency + 1;
|
||||
auto const idf = m_idfs->Get(m_tfs[i].m_token, false /* isPrefix */);
|
||||
auto const newW = GetTfIdf(tf, idf);
|
||||
|
||||
auto const l = ln - oldFW * oldFW - oldPW * oldPW + newW * newW;
|
||||
|
||||
num = dot + (newW - oldFW) * rhs.GetWeight(docIdfs, j);
|
||||
denom = sqrt(l) * sqrt(rn);
|
||||
}
|
||||
|
||||
if (denom > 0)
|
||||
similarityWithPrefix = max(similarityWithPrefix, num / denom);
|
||||
}
|
||||
|
||||
return max(similarityWithPrefix, similarityNoPrefix);
|
||||
}
|
||||
|
||||
double QueryVec::Norm()
|
||||
{
|
||||
return SqrL2(*m_idfs, m_tfs, m_prefix);
|
||||
}
|
||||
|
||||
double QueryVec::GetFullTokenWeight(size_t i)
|
||||
{
|
||||
ASSERT_LESS(i, m_tfs.size(), ());
|
||||
return GetWeightImpl(*m_idfs, m_tfs[i], false /* isPrefix */);
|
||||
}
|
||||
|
||||
double QueryVec::GetPrefixTokenWeight()
|
||||
{
|
||||
ASSERT(m_prefix, ());
|
||||
return GetWeightImpl(*m_idfs, TokenFrequencyPair(*m_prefix, 1 /* frequency */), true /* isPrefix */);
|
||||
}
|
||||
} // namespace search
|
||||
134
libs/search/doc_vec.hpp
Normal file
134
libs/search/doc_vec.hpp
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/idf_map.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class IdfMap;
|
||||
|
||||
struct TokenFrequencyPair
|
||||
{
|
||||
TokenFrequencyPair() = default;
|
||||
|
||||
template <typename Token>
|
||||
TokenFrequencyPair(Token && token, uint64_t frequency) : m_token(std::forward<Token>(token))
|
||||
, m_frequency(frequency)
|
||||
{}
|
||||
|
||||
bool operator<(TokenFrequencyPair const & rhs) const;
|
||||
|
||||
void Swap(TokenFrequencyPair & rhs);
|
||||
|
||||
strings::UniString m_token;
|
||||
uint64_t m_frequency = 0;
|
||||
};
|
||||
|
||||
std::string DebugPrint(TokenFrequencyPair const & tf);
|
||||
|
||||
// This class represents a document in a vector space of tokens.
|
||||
class DocVec
|
||||
{
|
||||
public:
|
||||
class Builder
|
||||
{
|
||||
public:
|
||||
template <typename Token>
|
||||
void Add(Token && token)
|
||||
{
|
||||
m_tokens.emplace_back(std::forward<Token>(token));
|
||||
}
|
||||
|
||||
private:
|
||||
friend class DocVec;
|
||||
|
||||
std::vector<strings::UniString> m_tokens;
|
||||
};
|
||||
|
||||
DocVec() = default;
|
||||
explicit DocVec(Builder const & builder);
|
||||
|
||||
// Computes vector norm of the doc.
|
||||
double Norm(IdfMap & idfs) const;
|
||||
|
||||
size_t GetNumTokens() const { return m_tfs.size(); }
|
||||
|
||||
strings::UniString const & GetToken(size_t i) const;
|
||||
double GetIdf(IdfMap & idfs, size_t i) const;
|
||||
double GetWeight(IdfMap & idfs, size_t i) const;
|
||||
|
||||
bool Empty() const { return m_tfs.empty(); }
|
||||
|
||||
private:
|
||||
friend std::string DebugPrint(DocVec const & dv) { return "DocVec " + ::DebugPrint(dv.m_tfs); }
|
||||
|
||||
std::vector<TokenFrequencyPair> m_tfs;
|
||||
};
|
||||
|
||||
// This class represents a search query in a vector space of tokens.
|
||||
class QueryVec
|
||||
{
|
||||
public:
|
||||
class Builder
|
||||
{
|
||||
public:
|
||||
template <typename Token>
|
||||
void AddFull(Token && token)
|
||||
{
|
||||
m_tokens.emplace_back(std::forward<Token>(token));
|
||||
}
|
||||
|
||||
template <typename Token>
|
||||
void SetPrefix(Token && token)
|
||||
{
|
||||
m_prefix = std::forward<Token>(token);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class QueryVec;
|
||||
|
||||
std::vector<strings::UniString> m_tokens;
|
||||
std::optional<strings::UniString> m_prefix;
|
||||
};
|
||||
|
||||
explicit QueryVec(IdfMap & idfs) : m_idfs(&idfs) {}
|
||||
|
||||
QueryVec(IdfMap & idfs, Builder const & builder);
|
||||
|
||||
// Computes cosine similarity between |*this| and |rhs|.
|
||||
double Similarity(IdfMap & docIdfs, DocVec const & rhs);
|
||||
|
||||
// Computes vector norm of the query.
|
||||
double Norm();
|
||||
|
||||
bool Empty() const { return m_tfs.empty() && !m_prefix; }
|
||||
|
||||
private:
|
||||
double GetFullTokenWeight(size_t i);
|
||||
double GetPrefixTokenWeight();
|
||||
|
||||
friend std::string DebugPrint(QueryVec const & qv)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "QueryVec " + ::DebugPrint(qv.m_tfs);
|
||||
if (qv.m_prefix)
|
||||
os << " " << DebugPrint(*qv.m_prefix);
|
||||
return os.str();
|
||||
}
|
||||
|
||||
IdfMap * m_idfs;
|
||||
std::vector<TokenFrequencyPair> m_tfs;
|
||||
std::optional<strings::UniString> m_prefix;
|
||||
};
|
||||
} // namespace search
|
||||
118
libs/search/downloader_search_callback.cpp
Normal file
118
libs/search/downloader_search_callback.cpp
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
#include "search/downloader_search_callback.hpp"
|
||||
|
||||
#include "search/result.hpp"
|
||||
|
||||
#include "editor/editable_data_source.hpp"
|
||||
|
||||
#include "indexer/data_source.hpp"
|
||||
|
||||
#include "storage/country_info_getter.hpp"
|
||||
#include "storage/storage.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace
|
||||
{
|
||||
/// @todo Can't change on string_view now, because of unordered_map<string> synonyms.
|
||||
bool GetGroupCountryIdFromFeature(storage::Storage const & storage, FeatureType & ft, std::string & name)
|
||||
{
|
||||
auto const & synonyms = storage.GetCountryNameSynonyms();
|
||||
int8_t const langIndices[] = {StringUtf8Multilang::kEnglishCode, StringUtf8Multilang::kDefaultCode,
|
||||
StringUtf8Multilang::kInternationalCode};
|
||||
|
||||
for (auto const langIndex : langIndices)
|
||||
{
|
||||
name = ft.GetName(langIndex);
|
||||
if (name.empty())
|
||||
continue;
|
||||
|
||||
if (storage.IsInnerNode(name))
|
||||
return true;
|
||||
auto const it = synonyms.find(name);
|
||||
if (it == synonyms.end())
|
||||
continue;
|
||||
if (!storage.IsInnerNode(it->second))
|
||||
continue;
|
||||
name = it->second;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace search
|
||||
{
|
||||
DownloaderSearchCallback::DownloaderSearchCallback(Delegate & delegate, DataSource const & dataSource,
|
||||
storage::CountryInfoGetter const & infoGetter,
|
||||
storage::Storage const & storage,
|
||||
storage::DownloaderSearchParams params)
|
||||
: m_delegate(delegate)
|
||||
, m_dataSource(dataSource)
|
||||
, m_infoGetter(infoGetter)
|
||||
, m_storage(storage)
|
||||
, m_params(std::move(params))
|
||||
{}
|
||||
|
||||
void DownloaderSearchCallback::operator()(search::Results const & results)
|
||||
{
|
||||
storage::DownloaderSearchResults downloaderSearchResults;
|
||||
std::set<storage::DownloaderSearchResult> uniqueResults;
|
||||
|
||||
for (auto const & result : results)
|
||||
{
|
||||
if (!result.HasPoint())
|
||||
continue;
|
||||
|
||||
if (result.GetResultType() != search::Result::Type::LatLon)
|
||||
{
|
||||
FeatureID const & fid = result.GetFeatureID();
|
||||
FeaturesLoaderGuard loader(m_dataSource, fid.m_mwmId);
|
||||
auto ft = loader.GetFeatureByIndex(fid.m_index);
|
||||
if (!ft)
|
||||
{
|
||||
LOG(LERROR, ("Feature can't be loaded:", fid));
|
||||
continue;
|
||||
}
|
||||
|
||||
ftypes::LocalityType const type = ftypes::IsLocalityChecker::Instance().GetType(*ft);
|
||||
|
||||
if (type == ftypes::LocalityType::Country || type == ftypes::LocalityType::State)
|
||||
{
|
||||
std::string groupFeatureName;
|
||||
if (GetGroupCountryIdFromFeature(m_storage, *ft, groupFeatureName))
|
||||
{
|
||||
storage::DownloaderSearchResult downloaderResult(groupFeatureName, result.GetString() /* m_matchedName */);
|
||||
if (uniqueResults.find(downloaderResult) == uniqueResults.end())
|
||||
{
|
||||
uniqueResults.insert(downloaderResult);
|
||||
downloaderSearchResults.m_results.push_back(downloaderResult);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
auto const & mercator = result.GetFeatureCenter();
|
||||
storage::CountryId const & countryId = m_infoGetter.GetRegionCountryId(mercator);
|
||||
if (countryId == storage::kInvalidCountryId)
|
||||
continue;
|
||||
|
||||
storage::DownloaderSearchResult downloaderResult(countryId, result.GetString() /* m_matchedName */);
|
||||
if (uniqueResults.find(downloaderResult) == uniqueResults.end())
|
||||
{
|
||||
uniqueResults.insert(downloaderResult);
|
||||
downloaderSearchResults.m_results.push_back(downloaderResult);
|
||||
}
|
||||
}
|
||||
|
||||
downloaderSearchResults.m_query = m_params.m_query;
|
||||
downloaderSearchResults.m_endMarker = results.IsEndMarker();
|
||||
|
||||
m_delegate.RunUITask([onResults = m_params.m_onResults, results = std::move(downloaderSearchResults)]() mutable
|
||||
{ onResults(std::move(results)); });
|
||||
}
|
||||
} // namespace search
|
||||
46
libs/search/downloader_search_callback.hpp
Normal file
46
libs/search/downloader_search_callback.hpp
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
#pragma once
|
||||
|
||||
#include "storage/downloader_search_params.hpp"
|
||||
|
||||
#include <functional>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace storage
|
||||
{
|
||||
class CountryInfoGetter;
|
||||
class Storage;
|
||||
} // namespace storage
|
||||
|
||||
namespace search
|
||||
{
|
||||
class Results;
|
||||
|
||||
// An on-results callback that should be used for the search in downloader.
|
||||
//
|
||||
// *NOTE* the class is NOT thread safe.
|
||||
class DownloaderSearchCallback
|
||||
{
|
||||
public:
|
||||
class Delegate
|
||||
{
|
||||
public:
|
||||
virtual ~Delegate() = default;
|
||||
|
||||
virtual void RunUITask(std::function<void()> fn) = 0;
|
||||
};
|
||||
|
||||
DownloaderSearchCallback(Delegate & delegate, DataSource const & dataSource,
|
||||
storage::CountryInfoGetter const & infoGetter, storage::Storage const & storage,
|
||||
storage::DownloaderSearchParams params);
|
||||
|
||||
void operator()(search::Results const & results);
|
||||
|
||||
private:
|
||||
Delegate & m_delegate;
|
||||
DataSource const & m_dataSource;
|
||||
storage::CountryInfoGetter const & m_infoGetter;
|
||||
storage::Storage const & m_storage;
|
||||
storage::DownloaderSearchParams m_params;
|
||||
};
|
||||
} // namespace search
|
||||
28
libs/search/dummy_rank_table.cpp
Normal file
28
libs/search/dummy_rank_table.cpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include "search/dummy_rank_table.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
uint8_t DummyRankTable::Get(uint64_t /* i */) const
|
||||
{
|
||||
return kNoRank;
|
||||
}
|
||||
|
||||
uint64_t DummyRankTable::Size() const
|
||||
{
|
||||
NOTIMPLEMENTED();
|
||||
return 0;
|
||||
}
|
||||
|
||||
RankTable::Version DummyRankTable::GetVersion() const
|
||||
{
|
||||
NOTIMPLEMENTED();
|
||||
return RankTable::VERSION_COUNT;
|
||||
}
|
||||
|
||||
void DummyRankTable::Serialize(Writer &)
|
||||
{
|
||||
NOTIMPLEMENTED();
|
||||
}
|
||||
} // namespace search
|
||||
21
libs/search/dummy_rank_table.hpp
Normal file
21
libs/search/dummy_rank_table.hpp
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/rank_table.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// This dummy rank table is used instead of a normal rank table when
|
||||
// the latter can't be loaded. It should not be serialized and can't
|
||||
// be loaded.
|
||||
class DummyRankTable : public RankTable
|
||||
{
|
||||
public:
|
||||
// RankTable overrides:
|
||||
uint8_t Get(uint64_t i) const override;
|
||||
uint64_t Size() const override;
|
||||
Version GetVersion() const override;
|
||||
void Serialize(Writer &) override;
|
||||
};
|
||||
} // namespace search
|
||||
44
libs/search/editor_delegate.cpp
Normal file
44
libs/search/editor_delegate.cpp
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
#include "search/editor_delegate.hpp"
|
||||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "editor/editable_data_source.hpp"
|
||||
|
||||
#include "indexer/data_source_helpers.hpp"
|
||||
#include "indexer/feature_decl.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search
|
||||
{
|
||||
EditorDelegate::EditorDelegate(DataSource const & dataSource) : m_dataSource(dataSource) {}
|
||||
|
||||
MwmSet::MwmId EditorDelegate::GetMwmIdByMapName(string const & name) const
|
||||
{
|
||||
return m_dataSource.GetMwmIdByCountryFile(platform::CountryFile(name));
|
||||
}
|
||||
|
||||
unique_ptr<osm::EditableMapObject> EditorDelegate::GetOriginalMapObject(FeatureID const & fid) const
|
||||
{
|
||||
FeaturesLoaderGuard guard(m_dataSource, fid.m_mwmId);
|
||||
auto feature = guard.GetOriginalFeatureByIndex(fid.m_index);
|
||||
if (!feature)
|
||||
return {};
|
||||
|
||||
auto object = make_unique<osm::EditableMapObject>();
|
||||
object->SetFromFeatureType(*feature);
|
||||
return object;
|
||||
}
|
||||
|
||||
string EditorDelegate::GetOriginalFeatureStreet(FeatureID const & fid) const
|
||||
{
|
||||
search::ReverseGeocoder const coder(m_dataSource);
|
||||
return coder.GetOriginalFeatureStreetName(fid);
|
||||
}
|
||||
|
||||
void EditorDelegate::ForEachFeatureAtPoint(osm::Editor::FeatureTypeFn && fn, m2::PointD const & point) const
|
||||
{
|
||||
auto constexpr kToleranceMeters = 1e-2;
|
||||
indexer::ForEachFeatureAtPoint(m_dataSource, std::move(fn), point, kToleranceMeters);
|
||||
}
|
||||
} // namespace search
|
||||
28
libs/search/editor_delegate.hpp
Normal file
28
libs/search/editor_delegate.hpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#include "editor/osm_editor.hpp"
|
||||
|
||||
#include "indexer/editable_map_object.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace search
|
||||
{
|
||||
class EditorDelegate : public osm::Editor::Delegate
|
||||
{
|
||||
public:
|
||||
EditorDelegate(DataSource const & dataSource);
|
||||
|
||||
// osm::Editor::Delegate overrides:
|
||||
MwmSet::MwmId GetMwmIdByMapName(std::string const & name) const override;
|
||||
std::unique_ptr<osm::EditableMapObject> GetOriginalMapObject(FeatureID const & fid) const override;
|
||||
std::string GetOriginalFeatureStreet(FeatureID const & fid) const override;
|
||||
void ForEachFeatureAtPoint(osm::Editor::FeatureTypeFn && fn, m2::PointD const & point) const override;
|
||||
|
||||
private:
|
||||
DataSource const & m_dataSource;
|
||||
};
|
||||
} // namespace search
|
||||
61
libs/search/emitter.hpp
Normal file
61
libs/search/emitter.hpp
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/result.hpp"
|
||||
#include "search/search_params.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/timer.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace bookmarks
|
||||
{
|
||||
struct Result;
|
||||
}
|
||||
|
||||
class Emitter
|
||||
{
|
||||
public:
|
||||
void Init(SearchParams::OnResults onResults)
|
||||
{
|
||||
m_onResults = std::move(onResults);
|
||||
m_results.Clear();
|
||||
m_prevEmitSize = 0;
|
||||
m_timer.Reset();
|
||||
}
|
||||
|
||||
bool AddResult(Result && res) { return m_results.AddResult(std::move(res)); }
|
||||
void AddResultNoChecks(Result && res) { m_results.AddResultNoChecks(std::move(res)); }
|
||||
void AddBookmarkResult(bookmarks::Result const & result) { m_results.AddBookmarkResult(result); }
|
||||
|
||||
void Emit(bool force = false)
|
||||
{
|
||||
auto const newCount = m_results.GetCount();
|
||||
if (m_prevEmitSize == newCount && !force)
|
||||
return;
|
||||
|
||||
LOG(LINFO, ("Emitting a new batch of results:", newCount - m_prevEmitSize, ",", m_timer.ElapsedMilliseconds(),
|
||||
"ms since the search has started."));
|
||||
m_prevEmitSize = m_results.GetCount();
|
||||
|
||||
m_onResults(m_results);
|
||||
}
|
||||
|
||||
Results const & GetResults() const { return m_results; }
|
||||
|
||||
void Finish(bool cancelled)
|
||||
{
|
||||
m_results.SetEndMarker(cancelled);
|
||||
Emit(true /* force */);
|
||||
}
|
||||
|
||||
private:
|
||||
SearchParams::OnResults m_onResults;
|
||||
Results m_results;
|
||||
size_t m_prevEmitSize = 0;
|
||||
base::Timer m_timer;
|
||||
};
|
||||
} // namespace search
|
||||
274
libs/search/engine.cpp
Normal file
274
libs/search/engine.cpp
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
#include "search/engine.hpp"
|
||||
|
||||
#include "search/processor.hpp"
|
||||
|
||||
#include "storage/country_info_getter.hpp"
|
||||
|
||||
#include "indexer/categories_holder.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/scope_guard.hpp"
|
||||
#include "base/timer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
class InitSuggestions
|
||||
{
|
||||
map<pair<strings::UniString, int8_t>, uint8_t> m_suggests;
|
||||
|
||||
public:
|
||||
void operator()(CategoriesHolder::Category::Name const & name)
|
||||
{
|
||||
if (name.m_prefixLengthToSuggest != CategoriesHolder::Category::kEmptyPrefixLength)
|
||||
{
|
||||
strings::UniString const uniName = NormalizeAndSimplifyString(name.m_name);
|
||||
|
||||
uint8_t & score = m_suggests[make_pair(uniName, name.m_locale)];
|
||||
if (score == 0 || score > name.m_prefixLengthToSuggest)
|
||||
score = name.m_prefixLengthToSuggest;
|
||||
}
|
||||
}
|
||||
|
||||
void GetSuggests(vector<Suggest> & suggests) const
|
||||
{
|
||||
suggests.reserve(suggests.size() + m_suggests.size());
|
||||
for (auto const & s : m_suggests)
|
||||
suggests.emplace_back(s.first.first, s.second, s.first.second);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// ProcessorHandle----------------------------------------------------------------------------------
|
||||
ProcessorHandle::ProcessorHandle() : m_processor(nullptr), m_cancelled(false) {}
|
||||
|
||||
void ProcessorHandle::Cancel()
|
||||
{
|
||||
lock_guard<mutex> lock(m_mu);
|
||||
m_cancelled = true;
|
||||
if (m_processor)
|
||||
m_processor->Cancel();
|
||||
}
|
||||
|
||||
void ProcessorHandle::Attach(Processor & processor)
|
||||
{
|
||||
lock_guard<mutex> lock(m_mu);
|
||||
m_processor = &processor;
|
||||
if (m_cancelled)
|
||||
m_processor->Cancel();
|
||||
}
|
||||
|
||||
void ProcessorHandle::Detach()
|
||||
{
|
||||
lock_guard<mutex> lock(m_mu);
|
||||
m_processor = nullptr;
|
||||
}
|
||||
|
||||
// Engine::Params ----------------------------------------------------------------------------------
|
||||
Engine::Params::Params() : m_locale("en"), m_numThreads(1) {}
|
||||
|
||||
Engine::Params::Params(string const & locale, size_t numThreads) : m_locale(locale), m_numThreads(numThreads) {}
|
||||
|
||||
// Engine ------------------------------------------------------------------------------------------
|
||||
Engine::Engine(DataSource & dataSource, CategoriesHolder const & categories,
|
||||
storage::CountryInfoGetter const & infoGetter, Params const & params)
|
||||
: m_shutdown(false)
|
||||
{
|
||||
InitSuggestions doInit;
|
||||
categories.ForEachName(doInit);
|
||||
doInit.GetSuggests(m_suggests);
|
||||
|
||||
m_contexts.resize(params.m_numThreads);
|
||||
for (size_t i = 0; i < params.m_numThreads; ++i)
|
||||
{
|
||||
auto processor = make_unique<Processor>(dataSource, categories, m_suggests, infoGetter);
|
||||
processor->SetPreferredLocale(params.m_locale);
|
||||
m_contexts[i].m_processor = std::move(processor);
|
||||
}
|
||||
|
||||
m_threads.reserve(params.m_numThreads);
|
||||
for (size_t i = 0; i < params.m_numThreads; ++i)
|
||||
m_threads.emplace_back(&Engine::MainLoop, this, ref(m_contexts[i]));
|
||||
|
||||
CacheWorldLocalities();
|
||||
LoadCitiesBoundaries();
|
||||
LoadCountriesTree();
|
||||
}
|
||||
|
||||
Engine::~Engine()
|
||||
{
|
||||
{
|
||||
lock_guard<mutex> lock(m_mu);
|
||||
m_shutdown = true;
|
||||
m_cv.notify_all();
|
||||
}
|
||||
|
||||
for (auto & thread : m_threads)
|
||||
thread.join();
|
||||
}
|
||||
|
||||
weak_ptr<ProcessorHandle> Engine::Search(SearchParams params)
|
||||
{
|
||||
shared_ptr<ProcessorHandle> handle(new ProcessorHandle());
|
||||
PostMessage(Message::TYPE_TASK, [this, params = std::move(params), handle](Processor & processor)
|
||||
{ DoSearch(std::move(params), handle, processor); });
|
||||
return handle;
|
||||
}
|
||||
|
||||
void Engine::SetLocale(string const & locale)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [locale](Processor & processor) { processor.SetPreferredLocale(locale); });
|
||||
}
|
||||
|
||||
size_t Engine::GetNumThreads() const
|
||||
{
|
||||
return m_threads.size();
|
||||
}
|
||||
|
||||
void Engine::ClearCaches()
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.ClearCaches(); });
|
||||
}
|
||||
|
||||
void Engine::CacheWorldLocalities()
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.CacheWorldLocalities(); });
|
||||
}
|
||||
|
||||
void Engine::LoadCitiesBoundaries()
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.LoadCitiesBoundaries(); });
|
||||
}
|
||||
|
||||
void Engine::LoadCountriesTree()
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.LoadCountriesTree(); });
|
||||
}
|
||||
|
||||
void Engine::EnableIndexingOfBookmarksDescriptions(bool enable)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST,
|
||||
[enable](Processor & processor) { processor.EnableIndexingOfBookmarksDescriptions(enable); });
|
||||
}
|
||||
|
||||
void Engine::EnableIndexingOfBookmarkGroup(bookmarks::GroupId const & groupId, bool enable)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST,
|
||||
[=](Processor & processor) { processor.EnableIndexingOfBookmarkGroup(groupId, enable); });
|
||||
}
|
||||
|
||||
void Engine::ResetBookmarks()
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.ResetBookmarks(); });
|
||||
}
|
||||
|
||||
void Engine::OnBookmarksCreated(vector<pair<bookmarks::Id, bookmarks::Doc>> const & marks)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [marks](Processor & processor) { processor.OnBookmarksCreated(marks); });
|
||||
}
|
||||
|
||||
void Engine::OnBookmarksUpdated(vector<pair<bookmarks::Id, bookmarks::Doc>> const & marks)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [marks](Processor & processor) { processor.OnBookmarksUpdated(marks); });
|
||||
}
|
||||
|
||||
void Engine::OnBookmarksDeleted(vector<bookmarks::Id> const & marks)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST, [marks](Processor & processor) { processor.OnBookmarksDeleted(marks); });
|
||||
}
|
||||
|
||||
void Engine::OnBookmarksAttachedToGroup(bookmarks::GroupId const & groupId, vector<bookmarks::Id> const & marks)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST,
|
||||
[groupId, marks](Processor & processor) { processor.OnBookmarksAttachedToGroup(groupId, marks); });
|
||||
}
|
||||
|
||||
void Engine::OnBookmarksDetachedFromGroup(bookmarks::GroupId const & groupId, vector<bookmarks::Id> const & marks)
|
||||
{
|
||||
PostMessage(Message::TYPE_BROADCAST,
|
||||
[groupId, marks](Processor & processor) { processor.OnBookmarksDetachedFromGroup(groupId, marks); });
|
||||
}
|
||||
|
||||
void Engine::MainLoop(Context & context)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
bool hasBroadcast = false;
|
||||
queue<Message> messages;
|
||||
|
||||
{
|
||||
unique_lock<mutex> lock(m_mu);
|
||||
m_cv.wait(lock, [&]() { return m_shutdown || !m_messages.empty() || !context.m_messages.empty(); });
|
||||
|
||||
if (m_shutdown)
|
||||
break;
|
||||
|
||||
// As SearchEngine is thread-safe, there is a global order on
|
||||
// public API requests, and this order is kept by the global
|
||||
// |m_messages| queue. When a broadcast message arrives, it
|
||||
// must be executed in any case by all threads, therefore the
|
||||
// first free thread extracts as many as possible broadcast
|
||||
// messages from |m_messages| front and replicates them to all
|
||||
// thread-specific |m_messages| queues.
|
||||
while (!m_messages.empty() && m_messages.front().m_type == Message::TYPE_BROADCAST)
|
||||
{
|
||||
for (auto & b : m_contexts)
|
||||
b.m_messages.push(m_messages.front());
|
||||
m_messages.pop();
|
||||
hasBroadcast = true;
|
||||
}
|
||||
|
||||
// Consumes first non-broadcast message, if any. We process
|
||||
// only a single task message (in constrast with broadcast
|
||||
// messages) because task messages are actually search queries,
|
||||
// whose processing may take an arbitrary amount of time. So
|
||||
// it's better to process only one message and leave rest to the
|
||||
// next free search thread.
|
||||
if (!m_messages.empty())
|
||||
{
|
||||
context.m_messages.push(std::move(m_messages.front()));
|
||||
m_messages.pop();
|
||||
}
|
||||
|
||||
messages.swap(context.m_messages);
|
||||
}
|
||||
|
||||
if (hasBroadcast)
|
||||
m_cv.notify_all();
|
||||
|
||||
while (!messages.empty())
|
||||
{
|
||||
messages.front()(*context.m_processor);
|
||||
messages.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void Engine::PostMessage(Args &&... args)
|
||||
{
|
||||
lock_guard<mutex> lock(m_mu);
|
||||
m_messages.emplace(std::forward<Args>(args)...);
|
||||
m_cv.notify_one();
|
||||
}
|
||||
|
||||
void Engine::DoSearch(SearchParams params, shared_ptr<ProcessorHandle> handle, Processor & processor)
|
||||
{
|
||||
LOG(LINFO, ("Search started:", params.m_mode, params.m_viewport));
|
||||
base::Timer timer;
|
||||
SCOPE_GUARD(printDuration, [&timer]() { LOG(LINFO, ("Search ended in", timer.ElapsedMilliseconds(), "ms.")); });
|
||||
|
||||
processor.Reset();
|
||||
handle->Attach(processor);
|
||||
SCOPE_GUARD(detach, [&handle] { handle->Detach(); });
|
||||
|
||||
processor.Search(std::move(params));
|
||||
}
|
||||
} // namespace search
|
||||
181
libs/search/engine.hpp
Normal file
181
libs/search/engine.hpp
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/search_params.hpp"
|
||||
#include "search/suggest.hpp"
|
||||
|
||||
#include "indexer/categories_holder.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
#include "base/thread.hpp"
|
||||
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace storage
|
||||
{
|
||||
class CountryInfoGetter;
|
||||
}
|
||||
|
||||
namespace search
|
||||
{
|
||||
class EngineData;
|
||||
class Processor;
|
||||
|
||||
// This class is used as a reference to a search processor in the
|
||||
// SearchEngine's queue. It's only possible to cancel a search
|
||||
// request via this reference.
|
||||
//
|
||||
// NOTE: this class is thread-safe.
|
||||
class ProcessorHandle
|
||||
{
|
||||
public:
|
||||
ProcessorHandle();
|
||||
|
||||
// Cancels processor this handle points to.
|
||||
void Cancel();
|
||||
|
||||
private:
|
||||
friend class Engine;
|
||||
|
||||
// Attaches the handle to a |processor|. If there was or will be a
|
||||
// cancel signal, this signal will be propagated to |processor|.
|
||||
// This method is called only once, when search engine starts
|
||||
// the processor this handle corresponds to.
|
||||
void Attach(Processor & processor);
|
||||
|
||||
// Detaches handle from a processor. This method is called only
|
||||
// once, when search engine completes processing of the query
|
||||
// that this handle corresponds to.
|
||||
void Detach();
|
||||
|
||||
Processor * m_processor;
|
||||
bool m_cancelled;
|
||||
std::mutex m_mu;
|
||||
|
||||
DISALLOW_COPY_AND_MOVE(ProcessorHandle);
|
||||
};
|
||||
|
||||
// This class is a wrapper around thread which processes search
|
||||
// queries one by one.
|
||||
//
|
||||
// NOTE: this class is thread safe.
|
||||
class Engine
|
||||
{
|
||||
public:
|
||||
struct Params
|
||||
{
|
||||
Params();
|
||||
Params(std::string const & locale, size_t numThreads);
|
||||
|
||||
std::string m_locale;
|
||||
|
||||
// This field controls number of threads SearchEngine will create
|
||||
// to process queries. Use this field wisely as large values may
|
||||
// negatively affect performance due to false sharing.
|
||||
size_t m_numThreads;
|
||||
};
|
||||
|
||||
// Doesn't take ownership of dataSource and categories.
|
||||
Engine(DataSource & dataSource, CategoriesHolder const & categories, storage::CountryInfoGetter const & infoGetter,
|
||||
Params const & params);
|
||||
~Engine();
|
||||
|
||||
// Posts search request to the queue and returns its handle.
|
||||
std::weak_ptr<ProcessorHandle> Search(SearchParams params);
|
||||
|
||||
// Sets default locale on all query processors.
|
||||
void SetLocale(std::string const & locale);
|
||||
|
||||
// Returns the number of request-processing threads.
|
||||
size_t GetNumThreads() const;
|
||||
|
||||
// Posts request to clear caches to the queue.
|
||||
void ClearCaches();
|
||||
|
||||
// Posts requests to load and cache localities from World.mwm.
|
||||
void CacheWorldLocalities();
|
||||
|
||||
// Posts request to reload cities boundaries tables.
|
||||
void LoadCitiesBoundaries();
|
||||
|
||||
// Posts request to load countries tree.
|
||||
void LoadCountriesTree();
|
||||
|
||||
void EnableIndexingOfBookmarksDescriptions(bool enable);
|
||||
void EnableIndexingOfBookmarkGroup(bookmarks::GroupId const & groupId, bool enable);
|
||||
|
||||
// Clears all bookmarks data and caches for all processors.
|
||||
void ResetBookmarks();
|
||||
|
||||
void OnBookmarksCreated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);
|
||||
void OnBookmarksUpdated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);
|
||||
void OnBookmarksDeleted(std::vector<bookmarks::Id> const & marks);
|
||||
void OnBookmarksAttachedToGroup(bookmarks::GroupId const & groupId, std::vector<bookmarks::Id> const & marks);
|
||||
void OnBookmarksDetachedFromGroup(bookmarks::GroupId const & groupId, std::vector<bookmarks::Id> const & marks);
|
||||
|
||||
private:
|
||||
struct Message
|
||||
{
|
||||
using Fn = std::function<void(Processor & processor)>;
|
||||
|
||||
enum Type
|
||||
{
|
||||
TYPE_TASK,
|
||||
TYPE_BROADCAST
|
||||
};
|
||||
|
||||
template <typename Gn>
|
||||
Message(Type type, Gn && gn) : m_type(type)
|
||||
, m_fn(std::forward<Gn>(gn))
|
||||
{}
|
||||
|
||||
void operator()(Processor & processor) { m_fn(processor); }
|
||||
|
||||
Type m_type;
|
||||
Fn m_fn;
|
||||
};
|
||||
|
||||
struct Context
|
||||
{
|
||||
// This field *CAN* be accessed by other threads, so |m_mu| must
|
||||
// be taken before access this queue. Messages are ordered here
|
||||
// by a timestamp and all timestamps are less than timestamps in
|
||||
// the global |m_messages| queue.
|
||||
std::queue<Message> m_messages;
|
||||
|
||||
// This field is thread-specific and *CAN NOT* be accessed by
|
||||
// other threads.
|
||||
std::unique_ptr<Processor> m_processor;
|
||||
};
|
||||
|
||||
// *ALL* following methods are executed on the m_threads threads.
|
||||
|
||||
// This method executes tasks from a common pool (|tasks|) in a FIFO
|
||||
// manner. |broadcast| contains per-thread tasks, but nevertheless
|
||||
// all necessary synchronization primitives must be used to access
|
||||
// |tasks| and |broadcast|.
|
||||
void MainLoop(Context & context);
|
||||
|
||||
template <typename... Args>
|
||||
void PostMessage(Args &&... args);
|
||||
|
||||
void DoSearch(SearchParams params, std::shared_ptr<ProcessorHandle> handle, Processor & processor);
|
||||
|
||||
std::vector<Suggest> m_suggests;
|
||||
|
||||
bool m_shutdown;
|
||||
std::mutex m_mu;
|
||||
std::condition_variable m_cv;
|
||||
|
||||
std::queue<Message> m_messages;
|
||||
std::vector<Context> m_contexts;
|
||||
std::vector<threads::SimpleThread> m_threads;
|
||||
};
|
||||
} // namespace search
|
||||
27
libs/search/feature_loader.cpp
Normal file
27
libs/search/feature_loader.cpp
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#include "search/feature_loader.hpp"
|
||||
|
||||
#include "editor/editable_data_source.hpp"
|
||||
|
||||
#include "indexer/feature.hpp"
|
||||
#include "indexer/feature_decl.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
FeatureLoader::FeatureLoader(DataSource const & dataSource) : m_dataSource(dataSource) {}
|
||||
|
||||
std::unique_ptr<FeatureType> FeatureLoader::Load(FeatureID const & id)
|
||||
{
|
||||
ASSERT(m_checker.CalledOnOriginalThread(), ());
|
||||
|
||||
auto const & mwmId = id.m_mwmId;
|
||||
if (!m_guard || m_guard->GetId() != mwmId)
|
||||
m_guard = std::make_unique<FeaturesLoaderGuard>(m_dataSource, mwmId);
|
||||
return m_guard->GetFeatureByIndex(id.m_index);
|
||||
}
|
||||
|
||||
void FeatureLoader::Reset()
|
||||
{
|
||||
ASSERT(m_checker.CalledOnOriginalThread(), ());
|
||||
m_guard.reset();
|
||||
}
|
||||
} // namespace search
|
||||
39
libs/search/feature_loader.hpp
Normal file
39
libs/search/feature_loader.hpp
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/data_source.hpp"
|
||||
#include "indexer/scales.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/macros.hpp"
|
||||
#include "base/thread_checker.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
class FeatureType;
|
||||
struct FeatureID;
|
||||
|
||||
namespace search
|
||||
{
|
||||
class FeatureLoader
|
||||
{
|
||||
public:
|
||||
explicit FeatureLoader(DataSource const & dataSource);
|
||||
|
||||
std::unique_ptr<FeatureType> Load(FeatureID const & id);
|
||||
|
||||
void Reset();
|
||||
|
||||
void ForEachInRect(m2::RectD const & rect, std::function<void(FeatureType &)> const & fn)
|
||||
{
|
||||
ASSERT(m_checker.CalledOnOriginalThread(), ());
|
||||
m_dataSource.ForEachInRect(fn, rect, scales::GetUpperScale());
|
||||
}
|
||||
|
||||
private:
|
||||
DataSource const & m_dataSource;
|
||||
std::unique_ptr<FeaturesLoaderGuard> m_guard;
|
||||
|
||||
ThreadChecker m_checker;
|
||||
};
|
||||
} // namespace search
|
||||
336
libs/search/feature_offset_match.hpp
Normal file
336
libs/search/feature_offset_match.hpp
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/query_params.hpp"
|
||||
#include "search/search_index_values.hpp"
|
||||
#include "search/search_trie.hpp"
|
||||
#include "search/token_slice.hpp"
|
||||
|
||||
#include "indexer/trie.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/dfa_helpers.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
#include "base/uni_string_dfa.hpp"
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
template <typename ValueList>
|
||||
bool FindLangIndex(trie::Iterator<ValueList> const & trieRoot, uint8_t lang, uint32_t & langIx)
|
||||
{
|
||||
ASSERT_LESS(trieRoot.m_edges.size(), std::numeric_limits<uint32_t>::max(), ());
|
||||
|
||||
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edges.size());
|
||||
for (uint32_t i = 0; i < numLangs; ++i)
|
||||
{
|
||||
auto const & edge = trieRoot.m_edges[i].m_label;
|
||||
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
|
||||
if (edge[0] == lang)
|
||||
{
|
||||
langIx = i;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ValueList, typename DFA, typename ToDo>
|
||||
bool MatchInTrie(trie::Iterator<ValueList> const & trieRoot, strings::UniChar const * rootPrefix, size_t rootPrefixSize,
|
||||
DFA const & dfa, ToDo && toDo)
|
||||
{
|
||||
using TrieDFAIt = std::shared_ptr<trie::Iterator<ValueList>>;
|
||||
using DFAIt = typename DFA::Iterator;
|
||||
using State = std::pair<TrieDFAIt, DFAIt>;
|
||||
|
||||
std::queue<State> q;
|
||||
|
||||
{
|
||||
auto it = dfa.Begin();
|
||||
DFAMove(it, rootPrefix, rootPrefix + rootPrefixSize);
|
||||
if (it.Rejects())
|
||||
return false;
|
||||
q.emplace(trieRoot.Clone(), it);
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
|
||||
while (!q.empty())
|
||||
{
|
||||
auto const p = q.front();
|
||||
q.pop();
|
||||
|
||||
auto const & trieIt = p.first;
|
||||
auto const & dfaIt = p.second;
|
||||
|
||||
if (dfaIt.Accepts())
|
||||
{
|
||||
trieIt->m_values.ForEach([&dfaIt, &toDo](auto const & v) { toDo(v, dfaIt.ErrorsMade() == 0); });
|
||||
found = true;
|
||||
}
|
||||
|
||||
size_t const numEdges = trieIt->m_edges.size();
|
||||
for (size_t i = 0; i < numEdges; ++i)
|
||||
{
|
||||
auto const & edge = trieIt->m_edges[i];
|
||||
|
||||
auto curIt = dfaIt;
|
||||
strings::DFAMove(curIt, edge.m_label.begin(), edge.m_label.end());
|
||||
if (!curIt.Rejects())
|
||||
q.emplace(trieIt->GoToEdge(i), curIt);
|
||||
}
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
template <typename Filter, typename Value>
|
||||
class OffsetIntersector
|
||||
{
|
||||
using Values = std::unordered_map<Value, bool>;
|
||||
|
||||
Filter const & m_filter;
|
||||
std::unique_ptr<Values> m_prevValues;
|
||||
std::unique_ptr<Values> m_values;
|
||||
|
||||
public:
|
||||
explicit OffsetIntersector(Filter const & filter) : m_filter(filter), m_values(std::make_unique<Values>()) {}
|
||||
|
||||
void operator()(Value const & v, bool exactMatch)
|
||||
{
|
||||
if (m_prevValues && !m_prevValues->count(v))
|
||||
return;
|
||||
|
||||
if (m_filter(v))
|
||||
{
|
||||
auto res = m_values->emplace(v, exactMatch);
|
||||
if (!res.second)
|
||||
res.first->second = res.first->second || exactMatch;
|
||||
}
|
||||
}
|
||||
|
||||
void NextStep()
|
||||
{
|
||||
if (!m_prevValues)
|
||||
m_prevValues = std::make_unique<Values>();
|
||||
|
||||
m_prevValues.swap(m_values);
|
||||
m_values->clear();
|
||||
}
|
||||
|
||||
template <class ToDo>
|
||||
void ForEachResult(ToDo && toDo) const
|
||||
{
|
||||
if (!m_prevValues)
|
||||
return;
|
||||
for (auto const & value : *m_prevValues)
|
||||
toDo(value.first, value.second);
|
||||
}
|
||||
};
|
||||
} // namespace impl
|
||||
|
||||
template <typename ValueList>
|
||||
struct TrieRootPrefix
|
||||
{
|
||||
using Value = typename ValueList::Value;
|
||||
using Iterator = trie::Iterator<ValueList>;
|
||||
|
||||
Iterator const & m_root;
|
||||
strings::UniChar const * m_prefix;
|
||||
size_t m_prefixSize;
|
||||
|
||||
TrieRootPrefix(Iterator const & root, typename Iterator::Edge::EdgeLabel const & edge) : m_root(root)
|
||||
{
|
||||
if (edge.size() == 1)
|
||||
{
|
||||
m_prefix = 0;
|
||||
m_prefixSize = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_prefix = &edge[1];
|
||||
m_prefixSize = edge.size() - 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Filter, typename Value>
|
||||
class TrieValuesHolder
|
||||
{
|
||||
public:
|
||||
TrieValuesHolder(Filter const & filter) : m_filter(filter) {}
|
||||
|
||||
void operator()(Value const & v, bool exactMatch)
|
||||
{
|
||||
if (m_filter(v))
|
||||
m_values.emplace_back(v, exactMatch);
|
||||
}
|
||||
|
||||
template <class ToDo>
|
||||
void ForEachValue(ToDo && toDo) const
|
||||
{
|
||||
for (auto const & value : m_values)
|
||||
toDo(value.first, value.second);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<Value, bool>> m_values;
|
||||
Filter const & m_filter;
|
||||
};
|
||||
|
||||
template <typename DFA>
|
||||
struct SearchTrieRequest
|
||||
{
|
||||
SearchTrieRequest() = default;
|
||||
|
||||
SearchTrieRequest(SearchTrieRequest &&) = default;
|
||||
SearchTrieRequest & operator=(SearchTrieRequest &&) = default;
|
||||
|
||||
template <typename Langs>
|
||||
void SetLangs(Langs const & langs)
|
||||
{
|
||||
m_langs.clear();
|
||||
for (auto const lang : langs)
|
||||
if (lang >= 0 && lang <= std::numeric_limits<int8_t>::max())
|
||||
m_langs.insert(static_cast<int8_t>(lang));
|
||||
}
|
||||
|
||||
bool HasLang(int8_t lang) const { return m_langs.find(lang) != m_langs.cend(); }
|
||||
|
||||
void Clear()
|
||||
{
|
||||
m_names.clear();
|
||||
m_categories.clear();
|
||||
m_langs.clear();
|
||||
}
|
||||
|
||||
std::vector<DFA> m_names;
|
||||
std::vector<strings::UniStringDFA> m_categories;
|
||||
|
||||
// Set of languages, will be prepended to all DFAs in |m_names|
|
||||
// during retrieval from a search index. Semantics of this field
|
||||
// depends on the search index, for example this can be a set of
|
||||
// langs from StringUtf8Multilang, or a set of locale indices.
|
||||
std::unordered_set<int8_t> m_langs;
|
||||
};
|
||||
|
||||
// Calls |toDo| for each feature accepted by at least one DFA.
|
||||
//
|
||||
// *NOTE* |toDo| may be called several times for the same feature.
|
||||
template <typename DFA, typename ValueList, typename ToDo>
|
||||
void MatchInTrie(std::vector<DFA> const & dfas, TrieRootPrefix<ValueList> const & trieRoot, ToDo && toDo)
|
||||
{
|
||||
for (auto const & dfa : dfas)
|
||||
impl::MatchInTrie(trieRoot.m_root, trieRoot.m_prefix, trieRoot.m_prefixSize, dfa, toDo);
|
||||
}
|
||||
|
||||
// Calls |toDo| for each feature in categories branch matching to |request|.
|
||||
//
|
||||
// *NOTE* |toDo| may be called several times for the same feature.
|
||||
template <typename DFA, typename ValueList, typename ToDo>
|
||||
bool MatchCategoriesInTrie(SearchTrieRequest<DFA> const & request, trie::Iterator<ValueList> const & trieRoot,
|
||||
ToDo && toDo)
|
||||
{
|
||||
uint32_t langIx = 0;
|
||||
if (!impl::FindLangIndex(trieRoot, search::kCategoriesLang, langIx))
|
||||
return false;
|
||||
|
||||
auto const & edge = trieRoot.m_edges[langIx].m_label;
|
||||
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
|
||||
|
||||
auto const catRoot = trieRoot.GoToEdge(langIx);
|
||||
MatchInTrie(request.m_categories, TrieRootPrefix<ValueList>(*catRoot, edge), toDo);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Calls |toDo| with trie root prefix and language code on each
|
||||
// language allowed by |request|.
|
||||
template <typename DFA, typename ValueList, typename ToDo>
|
||||
void ForEachLangPrefix(SearchTrieRequest<DFA> const & request, trie::Iterator<ValueList> const & trieRoot, ToDo && toDo)
|
||||
{
|
||||
ASSERT_LESS(trieRoot.m_edges.size(), std::numeric_limits<uint32_t>::max(), ());
|
||||
|
||||
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edges.size());
|
||||
for (uint32_t langIx = 0; langIx < numLangs; ++langIx)
|
||||
{
|
||||
auto const & edge = trieRoot.m_edges[langIx].m_label;
|
||||
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
|
||||
int8_t const lang = static_cast<int8_t>(edge[0]);
|
||||
if (edge[0] < search::kCategoriesLang && request.HasLang(lang))
|
||||
{
|
||||
auto const langRoot = trieRoot.GoToEdge(langIx);
|
||||
TrieRootPrefix<ValueList> langPrefix(*langRoot, edge);
|
||||
toDo(langPrefix, lang);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calls |toDo| for each feature whose description matches to
|
||||
// |request|. Each feature will be passed to |toDo| only once.
|
||||
template <typename DFA, typename ValueList, typename Filter, typename ToDo>
|
||||
void MatchFeaturesInTrie(SearchTrieRequest<DFA> const & request, trie::Iterator<ValueList> const & trieRoot,
|
||||
Filter const & filter, ToDo && toDo)
|
||||
{
|
||||
using Value = typename ValueList::Value;
|
||||
|
||||
TrieValuesHolder<Filter, Value> categoriesHolder(filter);
|
||||
bool const categoriesExist = MatchCategoriesInTrie(request, trieRoot, categoriesHolder);
|
||||
|
||||
/// @todo Not sure why do we have OffsetIntersector here? We are doing aggregation only.
|
||||
impl::OffsetIntersector<Filter, Value> intersector(filter);
|
||||
|
||||
ForEachLangPrefix(request, trieRoot, [&request, &intersector](TrieRootPrefix<ValueList> & langRoot, int8_t /* lang */)
|
||||
{
|
||||
// Aggregate for all languages.
|
||||
MatchInTrie(request.m_names, langRoot, intersector);
|
||||
});
|
||||
|
||||
if (categoriesExist)
|
||||
{
|
||||
// Aggregate categories.
|
||||
categoriesHolder.ForEachValue(intersector);
|
||||
}
|
||||
|
||||
intersector.NextStep();
|
||||
intersector.ForEachResult(toDo);
|
||||
}
|
||||
|
||||
template <typename ValueList, typename Filter, typename ToDo>
|
||||
void MatchPostcodesInTrie(TokenSlice const & slice, trie::Iterator<ValueList> const & trieRoot, Filter const & filter,
|
||||
ToDo && toDo)
|
||||
{
|
||||
using namespace strings;
|
||||
using Value = typename ValueList::Value;
|
||||
|
||||
uint32_t langIx = 0;
|
||||
if (!impl::FindLangIndex(trieRoot, search::kPostcodesLang, langIx))
|
||||
return;
|
||||
|
||||
auto const & edge = trieRoot.m_edges[langIx].m_label;
|
||||
auto const postcodesRoot = trieRoot.GoToEdge(langIx);
|
||||
|
||||
impl::OffsetIntersector<Filter, Value> intersector(filter);
|
||||
for (size_t i = 0; i < slice.Size(); ++i)
|
||||
{
|
||||
// Full match required even for prefix token. Reasons:
|
||||
// 1. For postcode every symbol is important, partial matching can lead to wrong results.
|
||||
// 2. For prefix match query like "streetname 40" where |streetname| is located in 40xxx
|
||||
// postcode zone will give all street vicinity as the result which is wrong.
|
||||
std::vector<UniStringDFA> dfas;
|
||||
slice.Get(i).ForOriginalAndSynonyms([&dfas](UniString const & s) { dfas.emplace_back(s); });
|
||||
MatchInTrie(dfas, TrieRootPrefix<ValueList>(*postcodesRoot, edge), intersector);
|
||||
|
||||
intersector.NextStep();
|
||||
}
|
||||
|
||||
intersector.ForEachResult(toDo);
|
||||
}
|
||||
} // namespace search
|
||||
36
libs/search/features_filter.cpp
Normal file
36
libs/search/features_filter.cpp
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#include "search/features_filter.hpp"
|
||||
|
||||
#include "search/cbv.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
// FeaturesFilter ----------------------------------------------------------------------------------
|
||||
FeaturesFilter::FeaturesFilter(CBV const & filter, uint64_t threshold) : m_filter(filter), m_threshold(threshold) {}
|
||||
|
||||
bool FeaturesFilter::NeedToFilter(CBV const & cbv) const
|
||||
{
|
||||
if (cbv.IsFull())
|
||||
return true;
|
||||
return cbv.PopCount() > m_threshold;
|
||||
}
|
||||
|
||||
// LocalityFilter ----------------------------------------------------------------------------------
|
||||
LocalityFilter::LocalityFilter(CBV const & filter) : FeaturesFilter(filter, 0 /* threshold */) {}
|
||||
|
||||
CBV LocalityFilter::Filter(CBV const & cbv) const
|
||||
{
|
||||
return m_filter.Intersect(cbv);
|
||||
}
|
||||
|
||||
// ViewportFilter ----------------------------------------------------------------------------------
|
||||
ViewportFilter::ViewportFilter(CBV const & filter, uint64_t threshold) : FeaturesFilter(filter, threshold) {}
|
||||
|
||||
CBV ViewportFilter::Filter(CBV const & cbv) const
|
||||
{
|
||||
auto result = m_filter.Intersect(cbv);
|
||||
if (!result.IsEmpty())
|
||||
return result;
|
||||
|
||||
return cbv.Take(m_threshold);
|
||||
}
|
||||
} // namespace search
|
||||
52
libs/search/features_filter.hpp
Normal file
52
libs/search/features_filter.hpp
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class CBV;
|
||||
|
||||
// A lightweight filter of features.
|
||||
//
|
||||
// NOTE: this class and its subclasses *ARE* thread-safe.
|
||||
class FeaturesFilter
|
||||
{
|
||||
public:
|
||||
FeaturesFilter(CBV const & filter, uint64_t threshold);
|
||||
|
||||
virtual ~FeaturesFilter() = default;
|
||||
|
||||
bool NeedToFilter(CBV const & features) const;
|
||||
|
||||
virtual CBV Filter(CBV const & cbv) const = 0;
|
||||
|
||||
protected:
|
||||
CBV const & m_filter;
|
||||
uint64_t const m_threshold;
|
||||
};
|
||||
|
||||
// Exact filter - leaves only features belonging to the set it was
|
||||
// constructed from.
|
||||
class LocalityFilter : public FeaturesFilter
|
||||
{
|
||||
public:
|
||||
LocalityFilter(CBV const & filter);
|
||||
|
||||
// FeaturesFilter overrides:
|
||||
CBV Filter(CBV const & cbv) const override;
|
||||
};
|
||||
|
||||
// Fuzzy filter - tries to leave only features belonging to the set it
|
||||
// was constructed from, but if the result is empty, leaves at most
|
||||
// first |threshold| features instead. This property is quite useful
|
||||
// when there are no matching features in viewport but it's ok to
|
||||
// process a limited number of features outside the viewport.
|
||||
class ViewportFilter : public FeaturesFilter
|
||||
{
|
||||
public:
|
||||
ViewportFilter(CBV const & filter, uint64_t threshold);
|
||||
|
||||
// FeaturesFilter overrides:
|
||||
CBV Filter(CBV const & cbv) const override;
|
||||
};
|
||||
} // namespace search
|
||||
35
libs/search/features_layer.cpp
Normal file
35
libs/search/features_layer.cpp
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#include "search/features_layer.hpp"
|
||||
|
||||
#include "base/internal/message.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search
|
||||
{
|
||||
FeaturesLayer::FeaturesLayer()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
void FeaturesLayer::Clear()
|
||||
{
|
||||
m_sortedFeatures = nullptr;
|
||||
m_subQuery.clear();
|
||||
m_tokenRange.Clear();
|
||||
m_type = Model::TYPE_COUNT;
|
||||
m_hasDelayedFeatures = false;
|
||||
m_lastTokenIsPrefix = false;
|
||||
}
|
||||
|
||||
string DebugPrint(FeaturesLayer const & layer)
|
||||
{
|
||||
ostringstream os;
|
||||
os << "FeaturesLayer [size of m_sortedFeatures: " << (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0)
|
||||
<< ", subquery: " << DebugPrint(layer.m_subQuery) << ", tokenRange: " << DebugPrint(layer.m_tokenRange)
|
||||
<< ", type: " << DebugPrint(layer.m_type) << ", lastTokenIsPrefix: " << layer.m_lastTokenIsPrefix << "]";
|
||||
return os.str();
|
||||
}
|
||||
} // namespace search
|
||||
43
libs/search/features_layer.hpp
Normal file
43
libs/search/features_layer.hpp
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/cbv.hpp"
|
||||
#include "search/model.hpp"
|
||||
#include "search/token_range.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// This structure represents a part of search query interpretation -
|
||||
// when to a substring of tokens [m_startToken, m_endToken) is matched
|
||||
// with a set of m_features of the same m_type.
|
||||
struct FeaturesLayer
|
||||
{
|
||||
FeaturesLayer();
|
||||
|
||||
void Clear();
|
||||
|
||||
// Non-owning ptr to a sorted vector of features.
|
||||
std::vector<uint32_t> const * m_sortedFeatures = nullptr;
|
||||
// Fetch vector of Features, described by this layer (used for CITY, SUBURB).
|
||||
std::function<CBV()> m_getFeatures;
|
||||
|
||||
strings::UniString m_subQuery;
|
||||
TokenRange m_tokenRange;
|
||||
Model::Type m_type;
|
||||
|
||||
// Meaningful only when m_type equals to BUILDING.
|
||||
// When true, m_sortedFeatures contains only features retrieved from
|
||||
// search index by m_subQuery, and it's necessary for Geocoder to
|
||||
// perform additional work to retrieve features matching by house number.
|
||||
bool m_hasDelayedFeatures;
|
||||
|
||||
bool m_lastTokenIsPrefix;
|
||||
};
|
||||
|
||||
std::string DebugPrint(FeaturesLayer const & layer);
|
||||
} // namespace search
|
||||
152
libs/search/features_layer_matcher.cpp
Normal file
152
libs/search/features_layer_matcher.cpp
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
#include "search/features_layer_matcher.hpp"
|
||||
|
||||
#include "search/house_to_street_table.hpp"
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "editor/osm_editor.hpp"
|
||||
|
||||
#include "indexer/scales.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
/// Max distance from house to street where we do search matching
|
||||
/// even if there is no exact street written for this house.
|
||||
int constexpr kMaxApproxStreetDistanceM = 100;
|
||||
|
||||
FeaturesLayerMatcher::FeaturesLayerMatcher(DataSource const & dataSource, base::Cancellable const & cancellable)
|
||||
: m_context(nullptr)
|
||||
, m_postcodes(nullptr)
|
||||
, m_reverseGeocoder(dataSource)
|
||||
, m_nearbyStreetsCache("FeatureToNearbyStreets")
|
||||
, m_matchingStreetsCache("BuildingToStreet")
|
||||
, m_place2address("PlaceToAddresses")
|
||||
, m_loader(scales::GetUpperScale(), ReverseGeocoder::kLookupRadiusM)
|
||||
, m_cancellable(cancellable)
|
||||
{}
|
||||
|
||||
void FeaturesLayerMatcher::SetContext(MwmContext * context)
|
||||
{
|
||||
ASSERT(context, ());
|
||||
if (m_context == context)
|
||||
return;
|
||||
|
||||
m_context = context;
|
||||
m_loader.SetContext(context);
|
||||
}
|
||||
|
||||
void FeaturesLayerMatcher::SetPostcodes(CBV const * postcodes)
|
||||
{
|
||||
m_postcodes = postcodes;
|
||||
}
|
||||
|
||||
void FeaturesLayerMatcher::OnQueryFinished()
|
||||
{
|
||||
m_nearbyStreetsCache.ClearIfNeeded();
|
||||
m_matchingStreetsCache.ClearIfNeeded();
|
||||
m_place2address.ClearIfNeeded();
|
||||
|
||||
m_loader.OnQueryFinished();
|
||||
}
|
||||
|
||||
std::vector<uint32_t> const & FeaturesLayerMatcher::GetPlaceAddrFeatures(uint32_t placeId,
|
||||
std::function<CBV()> const & fn)
|
||||
{
|
||||
ASSERT(fn, ());
|
||||
|
||||
auto const res = m_place2address.Get(placeId);
|
||||
if (res.second)
|
||||
{
|
||||
auto & value = m_context->m_value;
|
||||
if (!value.m_house2place)
|
||||
value.m_house2place = LoadHouseToPlaceTable(value);
|
||||
|
||||
fn().ForEach([&](uint32_t fid)
|
||||
{
|
||||
auto const r = value.m_house2place->Get(fid);
|
||||
if (r && r->m_streetId == placeId)
|
||||
res.first.push_back(fid);
|
||||
});
|
||||
|
||||
ASSERT(base::IsSortedAndUnique(res.first), ());
|
||||
}
|
||||
return res.first;
|
||||
}
|
||||
|
||||
uint32_t FeaturesLayerMatcher::GetMatchingStreet(FeatureID const & houseId)
|
||||
{
|
||||
std::unique_ptr<FeatureType> feature;
|
||||
return GetMatchingStreetImpl(houseId, [&]()
|
||||
{
|
||||
feature = GetByIndex(houseId.m_index);
|
||||
return feature.get();
|
||||
});
|
||||
}
|
||||
|
||||
uint32_t FeaturesLayerMatcher::GetMatchingStreet(FeatureType & feature)
|
||||
{
|
||||
return GetMatchingStreetImpl(feature.GetID(), [&]() { return &feature; });
|
||||
}
|
||||
|
||||
FeaturesLayerMatcher::Streets const & FeaturesLayerMatcher::GetNearbyStreets(FeatureType & feature)
|
||||
{
|
||||
auto entry = m_nearbyStreetsCache.Get(feature.GetID().m_index);
|
||||
if (!entry.second)
|
||||
return entry.first;
|
||||
|
||||
entry.first = m_reverseGeocoder.GetNearbyStreets(feature);
|
||||
return entry.first;
|
||||
}
|
||||
|
||||
template <class FeatureGetterT>
|
||||
uint32_t FeaturesLayerMatcher::GetMatchingStreetImpl(FeatureID const & id, FeatureGetterT && getter)
|
||||
{
|
||||
// Check if this feature is modified - the logic will be different.
|
||||
string streetName;
|
||||
bool const edited = osm::Editor::Instance().GetEditedFeatureStreet(id, streetName);
|
||||
|
||||
// Check the cached result value.
|
||||
auto entry = m_matchingStreetsCache.Get(id.m_index);
|
||||
if (!edited && !entry.second)
|
||||
return entry.first;
|
||||
|
||||
uint32_t & result = entry.first;
|
||||
result = kInvalidId;
|
||||
|
||||
FeatureType * pFeature = getter();
|
||||
if (pFeature == nullptr)
|
||||
return result;
|
||||
|
||||
FeatureID streetId;
|
||||
if (!edited && m_reverseGeocoder.GetOriginalStreetByHouse(*pFeature, streetId))
|
||||
{
|
||||
result = streetId.m_index;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Get nearby streets and calculate the resulting index.
|
||||
auto const & streets = GetNearbyStreets(*pFeature);
|
||||
|
||||
if (edited)
|
||||
{
|
||||
auto const ret =
|
||||
find_if(streets.begin(), streets.end(), [&streetName](Street const & st) { return st.m_name == streetName; });
|
||||
if (ret != streets.end())
|
||||
{
|
||||
result = ret->m_id.m_index;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is no saved street for feature, assume that it's a nearest street if it's too close.
|
||||
if (!streets.empty() && streets[0].m_distanceMeters < kMaxApproxStreetDistanceM)
|
||||
result = streets[0].m_id.m_index;
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace search
|
||||
473
libs/search/features_layer_matcher.hpp
Normal file
473
libs/search/features_layer_matcher.hpp
Normal file
|
|
@ -0,0 +1,473 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/cancel_exception.hpp"
|
||||
#include "search/cbv.hpp"
|
||||
#include "search/features_layer.hpp"
|
||||
#include "search/house_numbers_matcher.hpp"
|
||||
#include "search/model.hpp"
|
||||
#include "search/mwm_context.hpp"
|
||||
#include "search/point_rect_matcher.hpp"
|
||||
#include "search/projection_on_street.hpp"
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
#include "search/stats_cache.hpp"
|
||||
#include "search/street_vicinity_loader.hpp"
|
||||
|
||||
#include "indexer/feature.hpp"
|
||||
#include "indexer/feature_algo.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
#include "base/cancellable.hpp"
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace search
|
||||
{
|
||||
// This class performs pairwise intersection between two layers of
|
||||
// features, where the first (child) layer is geographically smaller
|
||||
// than the second (parent) one. It emits all pairs
|
||||
// (feature-from-child-layer, feature-from-parent-layer) of matching
|
||||
// features, where feature-from-child-layer belongs-to
|
||||
// feature-from-parent-layer. Belongs-to is a partial relation on
|
||||
// features, and has different meaning for different search classes:
|
||||
//
|
||||
// * BUILDING/POI belongs-to STREET iff it is located on the street;
|
||||
// * BUILDING belongs-to CITY iff the building is located in the city;
|
||||
// * POI belongs-to BUILDING iff the poi is (roughly) located near or inside the building;
|
||||
// * SUBPOI belongs-to COMPLEX_POI iff the SUBPOI is (roughly) located near or inside the COMPLEX_POI;
|
||||
// * STREET belongs-to CITY iff the street is (roughly) located in the city;
|
||||
// * etc.
|
||||
//
|
||||
// NOTE: this class *IS NOT* thread-safe.
|
||||
class FeaturesLayerMatcher
|
||||
{
|
||||
public:
|
||||
static uint32_t constexpr kInvalidId = std::numeric_limits<uint32_t>::max();
|
||||
static int constexpr kBuildingRadiusMeters = 50;
|
||||
static int constexpr kComplexPoiRadiusMeters = 300;
|
||||
static int constexpr kStreetRadiusMeters = 100;
|
||||
|
||||
FeaturesLayerMatcher(DataSource const & dataSource, base::Cancellable const & cancellable);
|
||||
void SetContext(MwmContext * context);
|
||||
void SetPostcodes(CBV const * postcodes);
|
||||
|
||||
template <typename Fn>
|
||||
void Match(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
|
||||
{
|
||||
if (child.m_type >= parent.m_type)
|
||||
return;
|
||||
switch (parent.m_type)
|
||||
{
|
||||
case Model::TYPE_SUBPOI:
|
||||
case Model::TYPE_VILLAGE:
|
||||
case Model::TYPE_STATE:
|
||||
case Model::TYPE_COUNTRY:
|
||||
case Model::TYPE_UNCLASSIFIED:
|
||||
case Model::TYPE_COUNT: ASSERT(false, ("Invalid parent layer type:", parent.m_type)); break;
|
||||
case Model::TYPE_CITY:
|
||||
ASSERT_EQUAL(child.m_type, Model::TYPE_BUILDING, ());
|
||||
MatchBuildingsWithPlace(child, parent, fn);
|
||||
break;
|
||||
case Model::TYPE_COMPLEX_POI:
|
||||
ASSERT_EQUAL(child.m_type, Model::TYPE_SUBPOI, ());
|
||||
MatchPOIsWithParent(child, parent, fn);
|
||||
break;
|
||||
case Model::TYPE_BUILDING:
|
||||
ASSERT(Model::IsPoi(child.m_type), ());
|
||||
MatchPOIsWithParent(child, parent, fn);
|
||||
break;
|
||||
case Model::TYPE_STREET:
|
||||
ASSERT(Model::IsPoiOrBuilding(child.m_type), ("Invalid child layer type:", child.m_type));
|
||||
if (Model::IsPoi(child.m_type))
|
||||
MatchPOIsWithStreets(child, parent, fn);
|
||||
else
|
||||
MatchBuildingsWithStreets(child, parent, fn);
|
||||
break;
|
||||
case Model::TYPE_SUBURB:
|
||||
ASSERT(child.m_type == Model::TYPE_STREET || child.m_type == Model::TYPE_BUILDING || Model::IsPoi(child.m_type),
|
||||
());
|
||||
// Avoid matching buildings to suburb without street.
|
||||
if (child.m_type == Model::TYPE_BUILDING)
|
||||
MatchBuildingsWithPlace(child, parent, fn);
|
||||
else
|
||||
MatchChildWithSuburbs(child, parent, fn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void OnQueryFinished();
|
||||
|
||||
private:
|
||||
std::vector<uint32_t> const & GetPlaceAddrFeatures(uint32_t placeId, std::function<CBV()> const & fn);
|
||||
|
||||
void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
|
||||
|
||||
static bool HouseNumbersMatch(FeatureType & feature, std::vector<house_numbers::Token> const & queryParse)
|
||||
{
|
||||
ASSERT(!queryParse.empty(), ());
|
||||
|
||||
auto const interpol = ftypes::IsAddressInterpolChecker::Instance().GetInterpolType(feature);
|
||||
if (interpol != feature::InterpolType::None)
|
||||
return house_numbers::HouseNumbersMatchRange(feature.GetRef(), queryParse, interpol);
|
||||
|
||||
auto const uniHouse = strings::MakeUniString(feature.GetHouseNumber());
|
||||
if (uniHouse.empty())
|
||||
return false;
|
||||
|
||||
if (feature.GetID().IsEqualCountry({"Czech", "Slovakia"}))
|
||||
return house_numbers::HouseNumbersMatchConscription(uniHouse, queryParse);
|
||||
|
||||
return house_numbers::HouseNumbersMatch(uniHouse, queryParse);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void MatchPOIsWithParent(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
|
||||
{
|
||||
double parentRadius = 0.0;
|
||||
// Following code initially loads centers of POIs and then, for
|
||||
// each building, tries to find all POIs located at distance less
|
||||
// than parentRadius.
|
||||
|
||||
if (parent.m_type == Model::TYPE_BUILDING)
|
||||
{
|
||||
ASSERT(Model::IsPoi(child.m_type), ());
|
||||
parentRadius = kBuildingRadiusMeters;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT_EQUAL(parent.m_type, Model::TYPE_COMPLEX_POI, ());
|
||||
ASSERT_EQUAL(child.m_type, Model::TYPE_SUBPOI, ());
|
||||
parentRadius = kComplexPoiRadiusMeters;
|
||||
}
|
||||
|
||||
auto const & pois = *child.m_sortedFeatures;
|
||||
auto const & buildings = *parent.m_sortedFeatures;
|
||||
|
||||
BailIfCancelled();
|
||||
|
||||
std::vector<PointRectMatcher::PointIdPair> poiCenters;
|
||||
poiCenters.reserve(pois.size());
|
||||
|
||||
for (size_t i = 0; i < pois.size(); ++i)
|
||||
if (auto poiFt = GetByIndex(pois[i]))
|
||||
poiCenters.emplace_back(feature::GetCenter(*poiFt, FeatureType::WORST_GEOMETRY), i /* id */);
|
||||
|
||||
std::vector<PointRectMatcher::RectIdPair> buildingRects;
|
||||
buildingRects.reserve(buildings.size());
|
||||
auto maxRadius = parentRadius;
|
||||
for (size_t i = 0; i < buildings.size(); ++i)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
auto buildingFt = GetByIndex(buildings[i]);
|
||||
if (!buildingFt)
|
||||
continue;
|
||||
|
||||
if (buildingFt->GetGeomType() == feature::GeomType::Point)
|
||||
{
|
||||
auto const center = feature::GetCenter(*buildingFt, FeatureType::WORST_GEOMETRY);
|
||||
buildingRects.emplace_back(mercator::RectByCenterXYAndSizeInMeters(center, parentRadius), i /* id */);
|
||||
}
|
||||
else
|
||||
{
|
||||
buildingRects.emplace_back(buildingFt->GetLimitRect(FeatureType::WORST_GEOMETRY), i /* id */);
|
||||
double const rectSize = std::max(buildingRects.back().m_rect.SizeX(), buildingRects.back().m_rect.SizeY());
|
||||
maxRadius = std::max(maxRadius, rectSize / 2);
|
||||
}
|
||||
}
|
||||
|
||||
PointRectMatcher::Match(poiCenters, buildingRects, PointRectMatcher::RequestType::Any,
|
||||
[&](size_t poiId, size_t buildingId)
|
||||
{
|
||||
ASSERT_LESS(poiId, pois.size(), ());
|
||||
ASSERT_LESS(buildingId, buildings.size(), ());
|
||||
fn(pois[poiId], buildings[buildingId]);
|
||||
});
|
||||
|
||||
if (!parent.m_hasDelayedFeatures)
|
||||
return;
|
||||
|
||||
// |buildings| doesn't contain buildings matching by house number,
|
||||
// so following code reads buildings in POIs vicinities and checks
|
||||
// house numbers.
|
||||
std::vector<house_numbers::Token> queryParse;
|
||||
ParseQuery(parent.m_subQuery, parent.m_lastTokenIsPrefix, queryParse);
|
||||
if (queryParse.empty())
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < pois.size(); ++i)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
m_context->ForEachFeature(mercator::RectByCenterXYAndSizeInMeters(poiCenters[i].m_point, maxRadius),
|
||||
[&](FeatureType & ft)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
if (m_postcodes && !m_postcodes->HasBit(ft.GetID().m_index) && !m_postcodes->HasBit(GetMatchingStreet(ft)))
|
||||
return;
|
||||
if (HouseNumbersMatch(ft, queryParse))
|
||||
{
|
||||
double const distanceM = mercator::DistanceOnEarth(feature::GetCenter(ft), poiCenters[i].m_point);
|
||||
if (distanceM < maxRadius)
|
||||
fn(pois[i], ft.GetID().m_index);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void MatchPOIsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
ASSERT(Model::IsPoi(child.m_type), ());
|
||||
ASSERT_EQUAL(parent.m_type, Model::TYPE_STREET, ());
|
||||
|
||||
auto const & pois = *child.m_sortedFeatures;
|
||||
auto const & streets = *parent.m_sortedFeatures;
|
||||
|
||||
std::vector<PointRectMatcher::PointIdPair> poiCenters;
|
||||
poiCenters.reserve(pois.size());
|
||||
|
||||
for (size_t i = 0; i < pois.size(); ++i)
|
||||
if (auto poiFt = GetByIndex(pois[i]))
|
||||
poiCenters.emplace_back(feature::GetCenter(*poiFt, FeatureType::WORST_GEOMETRY), i /* id */);
|
||||
|
||||
std::vector<PointRectMatcher::RectIdPair> streetRects;
|
||||
streetRects.reserve(streets.size());
|
||||
|
||||
std::vector<ProjectionOnStreetCalculator> streetProjectors;
|
||||
streetProjectors.reserve(streets.size());
|
||||
|
||||
for (size_t i = 0; i < streets.size(); ++i)
|
||||
{
|
||||
auto streetFt = GetByIndex(streets[i]);
|
||||
if (!streetFt)
|
||||
continue;
|
||||
|
||||
streetFt->ParseGeometry(FeatureType::WORST_GEOMETRY);
|
||||
|
||||
m2::RectD inflationRect;
|
||||
// Any point is good enough here, and feature::GetCenter would re-read the geometry.
|
||||
if (streetFt->GetPointsCount() > 0)
|
||||
inflationRect = mercator::RectByCenterXYAndSizeInMeters(streetFt->GetPoint(0), 0.5 * kStreetRadiusMeters);
|
||||
|
||||
for (size_t j = 0; j + 1 < streetFt->GetPointsCount(); ++j)
|
||||
{
|
||||
auto const & p1 = streetFt->GetPoint(j);
|
||||
auto const & p2 = streetFt->GetPoint(j + 1);
|
||||
m2::RectD rect(p1, p2);
|
||||
rect.Inflate(inflationRect.SizeX(), inflationRect.SizeY());
|
||||
streetRects.emplace_back(rect, i /* id */);
|
||||
}
|
||||
|
||||
std::vector<m2::PointD> streetPoints;
|
||||
streetPoints.reserve(streetFt->GetPointsCount());
|
||||
for (size_t j = 0; j < streetFt->GetPointsCount(); ++j)
|
||||
streetPoints.emplace_back(streetFt->GetPoint(j));
|
||||
streetProjectors.emplace_back(streetPoints);
|
||||
}
|
||||
|
||||
BailIfCancelled();
|
||||
PointRectMatcher::Match(poiCenters, streetRects, PointRectMatcher::RequestType::All,
|
||||
[&](size_t poiId, size_t streetId)
|
||||
{
|
||||
ASSERT_LESS(poiId, pois.size(), ());
|
||||
ASSERT_LESS(streetId, streets.size(), ());
|
||||
auto const & poiCenter = poiCenters[poiId].m_point;
|
||||
ProjectionOnStreet proj;
|
||||
if (streetProjectors[streetId].GetProjection(poiCenter, proj) && proj.m_distMeters < kStreetRadiusMeters)
|
||||
fn(pois[poiId], streets[streetId]);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void MatchBuildingsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
|
||||
{
|
||||
ASSERT_EQUAL(child.m_type, Model::TYPE_BUILDING, ());
|
||||
ASSERT_EQUAL(parent.m_type, Model::TYPE_STREET, ());
|
||||
|
||||
auto const & buildings = *child.m_sortedFeatures;
|
||||
auto const & streets = *parent.m_sortedFeatures;
|
||||
|
||||
// When all buildings are in |buildings| and the number of
|
||||
// buildings less than the number of streets, it's probably faster
|
||||
// to check nearby streets for each building instead of street
|
||||
// vicinities loading.
|
||||
if (!child.m_hasDelayedFeatures && buildings.size() < streets.size())
|
||||
{
|
||||
for (uint32_t const houseId : buildings)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
uint32_t const streetId = GetMatchingStreet({m_context->GetId(), houseId});
|
||||
if (std::binary_search(streets.begin(), streets.end(), streetId))
|
||||
fn(houseId, streetId);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<house_numbers::Token> queryParse;
|
||||
ParseQuery(child.m_subQuery, child.m_lastTokenIsPrefix, queryParse);
|
||||
|
||||
uint32_t numFilterInvocations = 0;
|
||||
auto const houseNumberFilter = [&](uint32_t houseId, uint32_t streetId)
|
||||
{
|
||||
++numFilterInvocations;
|
||||
if ((numFilterInvocations & 0xFF) == 0)
|
||||
BailIfCancelled();
|
||||
|
||||
if (std::binary_search(buildings.begin(), buildings.end(), houseId))
|
||||
return true;
|
||||
|
||||
if (!child.m_hasDelayedFeatures || queryParse.empty())
|
||||
return false;
|
||||
|
||||
if (m_postcodes && !m_postcodes->HasBit(houseId) && !m_postcodes->HasBit(streetId))
|
||||
return false;
|
||||
|
||||
std::unique_ptr<FeatureType> feature = GetByIndex(houseId);
|
||||
if (!feature)
|
||||
return false;
|
||||
|
||||
return HouseNumbersMatch(*feature, queryParse);
|
||||
};
|
||||
|
||||
// Cache is not needed since we process unique and mapped-only house->street.
|
||||
// std::unordered_map<uint32_t, bool> cache;
|
||||
// auto const cachingHouseNumberFilter = [&](uint32_t houseId, uint32_t streetId)
|
||||
// {
|
||||
// auto const res = cache.emplace(houseId, false);
|
||||
// if (res.second)
|
||||
// res.first->second = houseNumberFilter(houseId, streetId);
|
||||
// return res.first->second;
|
||||
// };
|
||||
|
||||
for (uint32_t streetId : streets)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
StreetVicinityLoader::Street const & street = m_loader.GetStreet(streetId);
|
||||
if (street.IsEmpty())
|
||||
continue;
|
||||
|
||||
for (uint32_t houseId : street.m_features)
|
||||
if (houseNumberFilter(houseId, streetId))
|
||||
fn(houseId, streetId);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void MatchBuildingsWithPlace(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
|
||||
{
|
||||
ASSERT_EQUAL(child.m_type, Model::TYPE_BUILDING, ());
|
||||
|
||||
auto const & buildings = *child.m_sortedFeatures;
|
||||
uint32_t const placeId = parent.m_sortedFeatures->front();
|
||||
auto const & ids = GetPlaceAddrFeatures(placeId, parent.m_getFeatures);
|
||||
|
||||
if (!buildings.empty())
|
||||
{
|
||||
for (uint32_t houseId : buildings)
|
||||
if (std::binary_search(ids.begin(), ids.end(), houseId))
|
||||
fn(houseId, placeId);
|
||||
}
|
||||
if (!child.m_hasDelayedFeatures)
|
||||
return;
|
||||
|
||||
std::vector<house_numbers::Token> queryParse;
|
||||
ParseQuery(child.m_subQuery, child.m_lastTokenIsPrefix, queryParse);
|
||||
if (queryParse.empty())
|
||||
return;
|
||||
|
||||
uint32_t numFilterInvocations = 0;
|
||||
auto const houseNumberFilter = [&](uint32_t houseId)
|
||||
{
|
||||
++numFilterInvocations;
|
||||
if ((numFilterInvocations & 0xFF) == 0)
|
||||
BailIfCancelled();
|
||||
|
||||
if (m_postcodes && !m_postcodes->HasBit(houseId))
|
||||
return false;
|
||||
|
||||
/// @todo Add house -> number cache for this and MatchBuildingsWithStreets?
|
||||
std::unique_ptr<FeatureType> feature = GetByIndex(houseId);
|
||||
if (!feature)
|
||||
return false;
|
||||
|
||||
return HouseNumbersMatch(*feature, queryParse);
|
||||
};
|
||||
|
||||
for (uint32_t houseId : ids)
|
||||
if (houseNumberFilter(houseId))
|
||||
fn(houseId, placeId);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void MatchChildWithSuburbs(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
|
||||
{
|
||||
// Keep the old logic - simple stub that matches all childs. They will be filtered after in Geocoder.
|
||||
/// @todo Can intersect with parent.m_getFeatures here.
|
||||
uint32_t const suburbId = parent.m_sortedFeatures->front();
|
||||
for (uint32_t feature : *child.m_sortedFeatures)
|
||||
fn(feature, suburbId);
|
||||
}
|
||||
|
||||
// Returns id of a street feature corresponding to a |houseId|/|houseFeature|, or
|
||||
// kInvalidId if there're not such street.
|
||||
uint32_t GetMatchingStreet(FeatureID const & houseId);
|
||||
uint32_t GetMatchingStreet(FeatureType & houseFeature);
|
||||
template <class FeatureGetterT>
|
||||
uint32_t GetMatchingStreetImpl(FeatureID const & id, FeatureGetterT && getter);
|
||||
|
||||
using Street = ReverseGeocoder::Street;
|
||||
using Streets = std::vector<Street>;
|
||||
|
||||
Streets const & GetNearbyStreets(FeatureType & feature);
|
||||
|
||||
std::unique_ptr<FeatureType> GetByIndex(uint32_t id) const
|
||||
{
|
||||
/// @todo Add Cache for feature id -> (point, name / house number).
|
||||
auto res = m_context->GetFeature(id);
|
||||
|
||||
// It may happen to features deleted by the editor. We do not get them from EditableDataSource
|
||||
// but we still have ids of these features in the search index.
|
||||
if (!res)
|
||||
LOG(LWARNING, ("GetFeature() returned false.", id));
|
||||
return res;
|
||||
}
|
||||
|
||||
MwmContext * m_context;
|
||||
|
||||
CBV const * m_postcodes;
|
||||
|
||||
ReverseGeocoder m_reverseGeocoder;
|
||||
|
||||
// Cache of streets in a feature's vicinity. All lists in the cache
|
||||
// are ordered by distance from the corresponding feature.
|
||||
Cache<uint32_t, Streets> m_nearbyStreetsCache;
|
||||
|
||||
// Cache of correct streets for buildings. Current search algorithm
|
||||
// supports only one street for a building, whereas buildings can be
|
||||
// located on multiple streets.
|
||||
Cache<uint32_t, uint32_t> m_matchingStreetsCache;
|
||||
|
||||
// Cache of addresses that belong to a place (city/village).
|
||||
Cache<uint32_t, std::vector<uint32_t>> m_place2address;
|
||||
|
||||
StreetVicinityLoader m_loader;
|
||||
base::Cancellable const & m_cancellable;
|
||||
};
|
||||
} // namespace search
|
||||
214
libs/search/features_layer_path_finder.cpp
Normal file
214
libs/search/features_layer_path_finder.cpp
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
#include "search/features_layer_path_finder.hpp"
|
||||
|
||||
#include "search/features_layer_matcher.hpp"
|
||||
#include "search/house_numbers_matcher.hpp"
|
||||
|
||||
#include "indexer/features_vector.hpp"
|
||||
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <deque>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
// static
|
||||
FeaturesLayerPathFinder::Mode FeaturesLayerPathFinder::m_mode = MODE_AUTO;
|
||||
|
||||
namespace
|
||||
{
|
||||
using ParentGraph = deque<unordered_map<uint32_t, uint32_t>>;
|
||||
|
||||
// This function tries to estimate amount of work needed to perform an
|
||||
// intersection pass on a sequence of layers.
|
||||
template <typename It>
|
||||
uint64_t CalcPassCost(It begin, It end)
|
||||
{
|
||||
uint64_t cost = 0;
|
||||
|
||||
if (begin == end)
|
||||
return cost;
|
||||
|
||||
uint64_t reachable = max((*begin)->m_sortedFeatures->size(), size_t(1));
|
||||
for (++begin; begin != end; ++begin)
|
||||
{
|
||||
uint64_t const layer = max((*begin)->m_sortedFeatures->size(), size_t(1));
|
||||
cost += layer * reachable;
|
||||
reachable = min(reachable, layer);
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
uint64_t CalcTopDownPassCost(vector<FeaturesLayer const *> const & layers)
|
||||
{
|
||||
return CalcPassCost(layers.rbegin(), layers.rend());
|
||||
}
|
||||
|
||||
uint64_t CalcBottomUpPassCost(vector<FeaturesLayer const *> const & layers)
|
||||
{
|
||||
return CalcPassCost(layers.begin(), layers.end());
|
||||
}
|
||||
|
||||
bool GetPath(uint32_t id, vector<FeaturesLayer const *> const & layers, ParentGraph const & parent,
|
||||
IntersectionResult & result)
|
||||
{
|
||||
result.Clear();
|
||||
if (layers.size() != parent.size() + 1)
|
||||
return false;
|
||||
|
||||
size_t level = 0;
|
||||
for (auto parentGraphLayer = parent.crbegin(); parentGraphLayer != parent.crend(); ++parentGraphLayer, ++level)
|
||||
{
|
||||
result.Set(layers[level]->m_type, id);
|
||||
auto const it = parentGraphLayer->find(id);
|
||||
if (it == parentGraphLayer->cend())
|
||||
return false;
|
||||
id = it->second;
|
||||
}
|
||||
result.Set(layers[level]->m_type, id);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MayHaveDelayedFeatures(FeaturesLayer const & layer)
|
||||
{
|
||||
return layer.m_type == Model::TYPE_BUILDING &&
|
||||
house_numbers::LooksLikeHouseNumber(layer.m_subQuery, layer.m_lastTokenIsPrefix);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <class FnT>
|
||||
void FeaturesLayerPathFinder::FindReachableVertices(FeaturesLayerMatcher & matcher,
|
||||
vector<FeaturesLayer const *> const & layers, FnT && fn)
|
||||
{
|
||||
switch (m_mode)
|
||||
{
|
||||
case MODE_AUTO:
|
||||
{
|
||||
uint64_t const topDownCost = CalcTopDownPassCost(layers);
|
||||
uint64_t const bottomUpCost = CalcBottomUpPassCost(layers);
|
||||
|
||||
if (bottomUpCost < topDownCost)
|
||||
FindReachableVerticesBottomUp(matcher, layers, fn);
|
||||
else
|
||||
FindReachableVerticesTopDown(matcher, layers, fn);
|
||||
}
|
||||
break;
|
||||
case MODE_BOTTOM_UP: FindReachableVerticesBottomUp(matcher, layers, fn); break;
|
||||
case MODE_TOP_DOWN: FindReachableVerticesTopDown(matcher, layers, fn); break;
|
||||
}
|
||||
}
|
||||
|
||||
template <class FnT>
|
||||
void FeaturesLayerPathFinder::FindReachableVerticesTopDown(FeaturesLayerMatcher & matcher,
|
||||
vector<FeaturesLayer const *> const & layers, FnT && fn)
|
||||
{
|
||||
ASSERT(!layers.empty(), ());
|
||||
|
||||
vector<uint32_t> reachable = *(layers.back()->m_sortedFeatures);
|
||||
vector<uint32_t> buffer;
|
||||
|
||||
ParentGraph parentGraph;
|
||||
|
||||
auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature)
|
||||
{
|
||||
auto & parent = parentGraph.back();
|
||||
if (parent.find(childFeature) != parent.end())
|
||||
return;
|
||||
parent[childFeature] = parentFeature;
|
||||
buffer.push_back(childFeature);
|
||||
};
|
||||
|
||||
for (size_t i = layers.size() - 1; i != 0; --i)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
parentGraph.emplace_back();
|
||||
FeaturesLayer parent(*layers[i]);
|
||||
if (i != layers.size() - 1)
|
||||
base::SortUnique(reachable);
|
||||
parent.m_sortedFeatures = &reachable;
|
||||
|
||||
// The first condition is an optimization: it is enough to extract
|
||||
// the delayed features only once.
|
||||
parent.m_hasDelayedFeatures = (i == layers.size() - 1 && MayHaveDelayedFeatures(parent));
|
||||
|
||||
FeaturesLayer child(*layers[i - 1]);
|
||||
child.m_hasDelayedFeatures = MayHaveDelayedFeatures(child);
|
||||
|
||||
buffer.clear();
|
||||
matcher.Match(child, parent, addEdge);
|
||||
reachable.swap(buffer);
|
||||
}
|
||||
|
||||
auto const & lowestLevel = reachable;
|
||||
|
||||
IntersectionResult result;
|
||||
for (auto const & id : lowestLevel)
|
||||
if (GetPath(id, layers, parentGraph, result))
|
||||
fn(result);
|
||||
}
|
||||
|
||||
template <class FnT>
|
||||
void FeaturesLayerPathFinder::FindReachableVerticesBottomUp(FeaturesLayerMatcher & matcher,
|
||||
vector<FeaturesLayer const *> const & layers, FnT && fn)
|
||||
{
|
||||
ASSERT(!layers.empty(), ());
|
||||
|
||||
vector<uint32_t> reachable = *(layers.front()->m_sortedFeatures);
|
||||
vector<uint32_t> buffer;
|
||||
|
||||
ParentGraph parentGraph;
|
||||
|
||||
// It is possible that there are delayed features on the lowest level.
|
||||
// We do not know about them until the matcher has been called, so
|
||||
// they will be added in |addEdge|. On the other hand, if there is
|
||||
// only one level, we must make sure that it is nonempty.
|
||||
// This problem does not arise in the top-down pass because there
|
||||
// the last reached level is exactly the lowest one.
|
||||
vector<uint32_t> lowestLevel = reachable;
|
||||
// True iff |addEdge| works with the lowest level.
|
||||
bool first = true;
|
||||
|
||||
auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature)
|
||||
{
|
||||
auto & parent = parentGraph.front();
|
||||
if (parent.find(childFeature) != parent.end())
|
||||
return;
|
||||
parent[childFeature] = parentFeature;
|
||||
buffer.push_back(parentFeature);
|
||||
|
||||
if (first)
|
||||
lowestLevel.push_back(childFeature);
|
||||
};
|
||||
|
||||
for (size_t i = 0; i + 1 != layers.size(); ++i)
|
||||
{
|
||||
BailIfCancelled();
|
||||
|
||||
parentGraph.emplace_front();
|
||||
FeaturesLayer child(*layers[i]);
|
||||
if (i != 0)
|
||||
base::SortUnique(reachable);
|
||||
child.m_sortedFeatures = &reachable;
|
||||
child.m_hasDelayedFeatures = (i == 0 && MayHaveDelayedFeatures(child));
|
||||
|
||||
FeaturesLayer parent(*layers[i + 1]);
|
||||
parent.m_hasDelayedFeatures = MayHaveDelayedFeatures(parent);
|
||||
|
||||
buffer.clear();
|
||||
matcher.Match(child, parent, addEdge);
|
||||
reachable.swap(buffer);
|
||||
|
||||
first = false;
|
||||
}
|
||||
|
||||
base::SortUnique(lowestLevel);
|
||||
|
||||
IntersectionResult result;
|
||||
for (auto const & id : lowestLevel)
|
||||
if (GetPath(id, layers, parentGraph, result))
|
||||
fn(result);
|
||||
}
|
||||
} // namespace search
|
||||
108
libs/search/features_layer_path_finder.hpp
Normal file
108
libs/search/features_layer_path_finder.hpp
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/cancel_exception.hpp"
|
||||
#include "search/features_layer.hpp"
|
||||
#include "search/intersection_result.hpp"
|
||||
|
||||
#ifdef DEBUG
|
||||
#include "base/logging.hpp"
|
||||
#include "base/timer.hpp"
|
||||
#endif // DEBUG
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
class FeaturesVector;
|
||||
class MwmValue;
|
||||
|
||||
namespace base
|
||||
{
|
||||
class Cancellable;
|
||||
}
|
||||
|
||||
namespace search
|
||||
{
|
||||
class FeaturesLayerMatcher;
|
||||
|
||||
// This class is able to find all paths through a layered graph, with
|
||||
// vertices as features, and edges as pairs of vertices satisfying
|
||||
// belongs-to relation. For more details on belongs-to relation see
|
||||
// documentation for FeaturesLayerMatcher.
|
||||
//
|
||||
// In short, this class is able to find all features matching to a
|
||||
// given interpretation of a search query.
|
||||
//
|
||||
// NOTE: this class *IS* thread-safe.
|
||||
class FeaturesLayerPathFinder
|
||||
{
|
||||
public:
|
||||
// An internal mode. The modes should produce similar results
|
||||
// and differ only in efficiency: a mode that is faster
|
||||
// for a search query may be slower for another.
|
||||
// Modes other than MODE_AUTO should be used only by the testing code.
|
||||
enum Mode
|
||||
{
|
||||
MODE_AUTO,
|
||||
MODE_TOP_DOWN,
|
||||
MODE_BOTTOM_UP
|
||||
};
|
||||
|
||||
FeaturesLayerPathFinder(base::Cancellable const & cancellable) : m_cancellable(cancellable) {}
|
||||
|
||||
template <typename TFn>
|
||||
void ForEachReachableVertex(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
|
||||
TFn && fn)
|
||||
{
|
||||
if (layers.empty())
|
||||
return;
|
||||
|
||||
// TODO (@y): remove following code as soon as
|
||||
// FindReachableVertices() will work fast for most cases
|
||||
// (significantly less than 1 second).
|
||||
#ifdef DEBUG
|
||||
for (auto const * layer : layers)
|
||||
LOG(LDEBUG, (DebugPrint(*layer)));
|
||||
|
||||
size_t count = 0;
|
||||
base::Timer timer;
|
||||
#endif // DEBUG
|
||||
|
||||
FindReachableVertices(matcher, layers, [&](IntersectionResult const & r)
|
||||
{
|
||||
fn(r);
|
||||
#ifdef DEBUG
|
||||
++count;
|
||||
#endif // DEBUG
|
||||
});
|
||||
|
||||
#ifdef DEBUG
|
||||
LOG(LDEBUG, ("Found:", count, "elapsed:", timer.ElapsedSeconds(), "seconds"));
|
||||
#endif // DEBUG
|
||||
}
|
||||
|
||||
static void SetModeForTesting(Mode mode) { m_mode = mode; }
|
||||
|
||||
private:
|
||||
void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
|
||||
|
||||
template <class FnT>
|
||||
void FindReachableVertices(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
|
||||
FnT && fn);
|
||||
|
||||
// Tries to find all |reachable| features from the lowest layer in a
|
||||
// high level -> low level pass.
|
||||
template <class FnT>
|
||||
void FindReachableVerticesTopDown(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
|
||||
FnT && fn);
|
||||
|
||||
// Tries to find all |reachable| features from the lowest layer in a
|
||||
// low level -> high level pass.
|
||||
template <class FnT>
|
||||
void FindReachableVerticesBottomUp(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
|
||||
FnT && fn);
|
||||
|
||||
base::Cancellable const & m_cancellable;
|
||||
|
||||
static Mode m_mode;
|
||||
};
|
||||
} // namespace search
|
||||
28
libs/search/filtering_params.hpp
Normal file
28
libs/search/filtering_params.hpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace search
|
||||
{
|
||||
|
||||
// Performance/quality sensitive settings. They are recommended, but not mandatory.
|
||||
// Radius is in meters from one of the predefined pivots:
|
||||
// - viewport center
|
||||
// - user's position
|
||||
// - matched city center
|
||||
struct RecommendedFilteringParams
|
||||
{
|
||||
/// @name When reading and matching features "along" the street.
|
||||
/// @{
|
||||
// Streets search radius, can be ignored if streets count in area is less than m_maxStreetsCount.
|
||||
double m_streetSearchRadiusM = 80000;
|
||||
// Max number of street cadidates. Streets count can be greater, if they are all inside m_streetSearchRadiusM area.
|
||||
size_t m_maxStreetsCount = 100;
|
||||
|
||||
// Streets cluster radius - average logical streets group in an average city.
|
||||
// In case if Exact match is not found in cluster, we do emit Relaxed cluster streets.
|
||||
double m_streetClusterRadiusMercator = 0.05; // ~5km
|
||||
/// @}
|
||||
};
|
||||
|
||||
} // namespace search
|
||||
1949
libs/search/geocoder.cpp
Normal file
1949
libs/search/geocoder.cpp
Normal file
File diff suppressed because it is too large
Load diff
374
libs/search/geocoder.hpp
Normal file
374
libs/search/geocoder.hpp
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/cancel_exception.hpp"
|
||||
#include "search/categories_cache.hpp"
|
||||
#include "search/cbv.hpp"
|
||||
#include "search/cities_boundaries_table.hpp"
|
||||
#include "search/cuisine_filter.hpp"
|
||||
#include "search/feature_offset_match.hpp"
|
||||
#include "search/features_layer.hpp"
|
||||
#include "search/features_layer_path_finder.hpp"
|
||||
#include "search/filtering_params.hpp"
|
||||
#include "search/geocoder_context.hpp"
|
||||
#include "search/geocoder_locality.hpp"
|
||||
#include "search/geometry_cache.hpp"
|
||||
#include "search/mode.hpp"
|
||||
#include "search/model.hpp"
|
||||
#include "search/mwm_context.hpp"
|
||||
#include "search/postcode_points.hpp"
|
||||
#include "search/query_params.hpp"
|
||||
#include "search/streets_matcher.hpp"
|
||||
#include "search/token_range.hpp"
|
||||
#include "search/tracer.hpp"
|
||||
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
#include "base/cancellable.hpp"
|
||||
#include "base/dfa_helpers.hpp"
|
||||
#include "base/levenshtein_dfa.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
class CategoriesHolder;
|
||||
class DataSource;
|
||||
class MwmValue;
|
||||
|
||||
namespace storage
|
||||
{
|
||||
class CountryInfoGetter;
|
||||
} // namespace storage
|
||||
|
||||
namespace search
|
||||
{
|
||||
class FeaturesFilter;
|
||||
class FeaturesLayerMatcher;
|
||||
class PreRanker;
|
||||
class TokenSlice;
|
||||
|
||||
// This class is used to retrieve all features corresponding to a
|
||||
// search query. Search query is represented as a sequence of tokens
|
||||
// (including synonyms for these tokens), and Geocoder tries to build
|
||||
// all possible partitions (or layers) of the search query, where each
|
||||
// layer is a set of features corresponding to some search class
|
||||
// (e.g. POI, BUILDING, STREET, etc., see search_model.hpp).
|
||||
// Then, Geocoder builds a layered graph, with edges between features
|
||||
// on adjacent layers (e.g. between BUILDING ans STREET, STREET and
|
||||
// CITY, etc.). Usually an edge between two features means that a
|
||||
// feature from the lowest layer geometrically belongs to a feature
|
||||
// from the highest layer (BUILDING is located on STREET, STREET is
|
||||
// located inside CITY, CITY is located inside STATE, etc.). Final
|
||||
// part is to find all paths through this layered graph and report all
|
||||
// features from the lowest layer, that are reachable from the
|
||||
// highest layer.
|
||||
class Geocoder
|
||||
{
|
||||
public:
|
||||
struct Params : public QueryParams
|
||||
{
|
||||
Mode m_mode = Mode::Everywhere;
|
||||
m2::RectD m_pivot;
|
||||
std::optional<m2::PointD> m_position;
|
||||
Locales m_categoryLocales;
|
||||
std::vector<uint32_t> m_cuisineTypes;
|
||||
std::vector<uint32_t> m_preferredTypes;
|
||||
std::shared_ptr<Tracer> m_tracer;
|
||||
|
||||
RecommendedFilteringParams m_filteringParams;
|
||||
|
||||
int m_scale = scales::GetUpperScale();
|
||||
|
||||
bool m_useDebugInfo = false; // Set to true for debug logs and tests.
|
||||
};
|
||||
|
||||
struct LocalitiesCaches
|
||||
{
|
||||
LocalitiesCaches(base::Cancellable const & cancellable);
|
||||
void Clear();
|
||||
|
||||
CountriesCache m_countries;
|
||||
StatesCache m_states;
|
||||
CitiesTownsOrVillagesCache m_citiesTownsOrVillages;
|
||||
VillagesCache m_villages;
|
||||
};
|
||||
|
||||
Geocoder(DataSource const & dataSource, storage::CountryInfoGetter const & infoGetter,
|
||||
CategoriesHolder const & categories, CitiesBoundariesTable const & citiesBoundaries, PreRanker & preRanker,
|
||||
LocalitiesCaches & localitiesCaches, base::Cancellable const & cancellable);
|
||||
~Geocoder();
|
||||
|
||||
// Sets search query params.
|
||||
void SetParams(Params const & params);
|
||||
|
||||
// Starts geocoding, retrieved features will be appended to
|
||||
// |results|.
|
||||
void GoEverywhere();
|
||||
void GoInViewport();
|
||||
|
||||
// Ends geocoding and informs the following stages
|
||||
// of the pipeline (PreRanker and further).
|
||||
// This method must be called from the previous stage
|
||||
// of the pipeline (the Processor).
|
||||
// If |cancelled| is true, the reason for calling Finish must
|
||||
// be the cancellation of processing the search request, otherwise
|
||||
// the reason must be the normal exit from GoEverywhere or GoInViewport.
|
||||
//
|
||||
// *NOTE* The caller assumes that a call to this method will never
|
||||
// result in search::CancelException even if the shutdown takes
|
||||
// noticeable time.
|
||||
void Finish(bool cancelled);
|
||||
|
||||
void CacheWorldLocalities();
|
||||
void ClearCaches();
|
||||
|
||||
private:
|
||||
enum class RectId
|
||||
{
|
||||
Pivot,
|
||||
Locality,
|
||||
Postcode,
|
||||
Suburb,
|
||||
Count
|
||||
};
|
||||
|
||||
using MwmInfoPtr = std::shared_ptr<MwmInfo>;
|
||||
struct ExtendedMwmInfos
|
||||
{
|
||||
struct ExtendedMwmInfo
|
||||
{
|
||||
bool operator<(ExtendedMwmInfo const & rhs) const { return m_score < rhs.m_score; }
|
||||
|
||||
MwmInfoPtr m_info;
|
||||
MwmContext::MwmType m_type;
|
||||
|
||||
// Score is a rect distance, with exceptions for World, viewport and users's position.
|
||||
// Less score is better for search priority.
|
||||
double m_score;
|
||||
};
|
||||
|
||||
std::vector<ExtendedMwmInfo> m_infos;
|
||||
size_t m_firstBatchSize = 0;
|
||||
};
|
||||
|
||||
struct Postcodes
|
||||
{
|
||||
void Clear()
|
||||
{
|
||||
m_tokenRange.Clear();
|
||||
m_countryFeatures.Reset();
|
||||
m_worldFeatures.Reset();
|
||||
}
|
||||
|
||||
bool Has(uint32_t id, bool searchWorld = false) const
|
||||
{
|
||||
if (searchWorld)
|
||||
return m_worldFeatures.HasBit(id);
|
||||
return m_countryFeatures.HasBit(id);
|
||||
}
|
||||
|
||||
bool IsEmpty() const { return m_countryFeatures.IsEmpty() && m_worldFeatures.IsEmpty(); }
|
||||
|
||||
TokenRange m_tokenRange;
|
||||
CBV m_countryFeatures;
|
||||
CBV m_worldFeatures;
|
||||
};
|
||||
|
||||
// Sets search query params for categorial search.
|
||||
void SetParamsForCategorialSearch(Params const & params);
|
||||
|
||||
void GoImpl(std::vector<MwmInfoPtr> const & infos, bool inViewport);
|
||||
|
||||
template <typename Locality>
|
||||
using TokenToLocalities = std::map<TokenRange, std::vector<Locality>>;
|
||||
|
||||
QueryParams::Token const & GetTokens(size_t i) const;
|
||||
|
||||
// Creates a cache of posting lists corresponding to features in m_context
|
||||
// for each token and saves it to m_addressFeatures.
|
||||
void InitBaseContext(BaseContext & ctx);
|
||||
|
||||
void InitLayer(Model::Type type, TokenRange const & tokenRange, FeaturesLayer & layer);
|
||||
|
||||
void FillLocalityCandidates(BaseContext const & ctx, CBV const & filter, size_t const maxNumLocalities,
|
||||
std::vector<Locality> & preLocalities);
|
||||
|
||||
void FillLocalitiesTable(BaseContext const & ctx);
|
||||
|
||||
void FillVillageLocalities(BaseContext const & ctx);
|
||||
|
||||
bool CityHasPostcode(BaseContext const & ctx) const;
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachCountry(ExtendedMwmInfos const & infos, Fn && fn);
|
||||
|
||||
// Throws CancelException if cancelled.
|
||||
void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
|
||||
|
||||
// A fast-path branch for categorial requests.
|
||||
void MatchCategories(BaseContext & ctx, bool aroundPivot);
|
||||
|
||||
// Tries to find all countries and states in a search query and then
|
||||
// performs matching of cities in found maps.
|
||||
void MatchRegions(BaseContext & ctx, Region::Type type);
|
||||
|
||||
// Tries to find all cities in a search query and then performs
|
||||
// matching of streets in found cities.
|
||||
void MatchCities(BaseContext & ctx);
|
||||
|
||||
// Tries to do geocoding without localities, ie. find POIs,
|
||||
// BUILDINGs and STREETs without knowledge about country, state,
|
||||
// city or village. If during the geocoding too many features are
|
||||
// retrieved, viewport is used to throw away excess features.
|
||||
void MatchAroundPivot(BaseContext & ctx);
|
||||
|
||||
class CentersFilter
|
||||
{
|
||||
buffer_vector<m2::PointD, 4> m_centers;
|
||||
|
||||
public:
|
||||
void Add(m2::PointD const & pt) { m_centers.push_back(pt); }
|
||||
|
||||
template <class FnT>
|
||||
void ClusterizeStreets(std::vector<uint32_t> & streets, Geocoder const & geocoder, FnT && fn) const;
|
||||
};
|
||||
|
||||
// Tries to do geocoding in a limited scope, assuming that knowledge
|
||||
// about high-level features, like cities or countries, is
|
||||
// incorporated into |filter|.
|
||||
void LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter, CentersFilter const & centers);
|
||||
|
||||
template <typename Fn>
|
||||
void WithPostcodes(BaseContext & ctx, Fn && fn);
|
||||
|
||||
// Tries to match some adjacent tokens in the query as streets and
|
||||
// then performs geocoding in street vicinities.
|
||||
void GreedilyMatchStreets(BaseContext & ctx, CentersFilter const & centers);
|
||||
// Matches suburbs and streets inside suburbs like |GreedilyMatchStreets|.
|
||||
void GreedilyMatchStreetsWithSuburbs(BaseContext & ctx, CentersFilter const & centers);
|
||||
|
||||
void CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx, StreetsMatcher::Prediction const & prediction,
|
||||
CentersFilter const & centers, bool makeRelaxed);
|
||||
|
||||
void ProcessStreets(BaseContext & ctx, CentersFilter const & centers, CBV const & streets);
|
||||
|
||||
// Tries to find all paths in a search tree, where each edge is
|
||||
// marked with some substring of the query tokens. These paths are
|
||||
// called "layer sequence" and current path is stored in |m_layers|.
|
||||
void MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken, CBV const & filter);
|
||||
|
||||
// Returns true if current path in the search tree (see comment for
|
||||
// MatchPOIsAndBuildings()) looks sane. This method is used as a fast
|
||||
// pre-check to cut off unnecessary work.
|
||||
bool IsLayerSequenceSane(std::vector<FeaturesLayer> const & layers) const;
|
||||
|
||||
/// @returns kInvalidFeatureId in no matching found.
|
||||
uint32_t MatchWorld2Country(FeatureID const & id) const;
|
||||
// Finds all paths through layers and emits reachable features from the lowest layer.
|
||||
void FindPaths(BaseContext & ctx);
|
||||
|
||||
void TraceResult(Tracer & tracer, BaseContext const & ctx, MwmSet::MwmId const & mwmId, uint32_t ftId,
|
||||
Model::Type type, TokenRange const & tokenRange);
|
||||
|
||||
// Forms result and feeds it to |m_preRanker|.
|
||||
void EmitResult(BaseContext & ctx, FeatureID const & id, Model::Type type, TokenRange const & tokenRange,
|
||||
IntersectionResult const * geoParts, bool allTokensUsed, bool exactMatch);
|
||||
void EmitResult(BaseContext & ctx, Region const & region, TokenRange const & tokenRange, bool allTokensUsed,
|
||||
bool exactMatch);
|
||||
void EmitResult(BaseContext & ctx, City const & city, TokenRange const & tokenRange, bool allTokensUsed);
|
||||
|
||||
// Tries to match unclassified objects from lower layers, like
|
||||
// parks, forests, lakes, rivers, etc. This method finds all
|
||||
// UNCLASSIFIED objects that match to all currently unused tokens.
|
||||
void MatchUnclassified(BaseContext & ctx, size_t curToken);
|
||||
|
||||
// A wrapper around RetrievePostcodeFeatures.
|
||||
CBV RetrievePostcodeFeatures(MwmContext const & context, TokenSlice const & slice);
|
||||
|
||||
// A caching wrapper around Retrieval::RetrieveGeometryFeatures.
|
||||
CBV RetrieveGeometryFeatures(MwmContext const & context, m2::RectD const & rect, RectId id);
|
||||
|
||||
// This is a faster wrapper around SearchModel::GetSearchType(), as
|
||||
// it uses pre-loaded lists of streets and villages.
|
||||
[[nodiscard]] bool GetTypeInGeocoding(BaseContext const & ctx, uint32_t featureId, Model::Type & type);
|
||||
|
||||
// Reorders maps in a way that prefix consists of "best" maps to search and suffix consists of all
|
||||
// other maps ordered by minimum distance from pivot. Returns ExtendedMwmInfos structure which
|
||||
// consists of vector of mwms with MwmType information and number of "best" maps to search.
|
||||
// For viewport mode prefix consists of maps intersecting with pivot ordered by distance from
|
||||
// pivot center.
|
||||
// For non-viewport search mode prefix consists of maps intersecting with pivot, map with user
|
||||
// location and maps with cities matched to the query, sorting prefers mwms that contain the
|
||||
// user's position.
|
||||
ExtendedMwmInfos OrderCountries(bool inViewport, std::vector<MwmInfoPtr> const & infos);
|
||||
|
||||
DataSource const & m_dataSource;
|
||||
storage::CountryInfoGetter const & m_infoGetter;
|
||||
CategoriesHolder const & m_categories;
|
||||
|
||||
StreetsCache m_streetsCache;
|
||||
SuburbsCache m_suburbsCache;
|
||||
LocalitiesCaches & m_localitiesCaches;
|
||||
HotelsCache m_hotelsCache;
|
||||
FoodCache m_foodCache;
|
||||
cuisine_filter::CuisineFilter m_cuisineFilter;
|
||||
|
||||
base::Cancellable const & m_cancellable;
|
||||
|
||||
// Geocoder params.
|
||||
Params m_params;
|
||||
|
||||
// This field is used to map features to a limited number of search
|
||||
// classes.
|
||||
Model m_model;
|
||||
|
||||
// Following fields are set up by Search() method and can be
|
||||
// modified and used only from Search() or its callees.
|
||||
|
||||
MwmSet::MwmId m_worldId;
|
||||
|
||||
// Context of the currently processed mwm.
|
||||
std::unique_ptr<MwmContext> m_context;
|
||||
|
||||
// m_cities stores both big cities that are visible at World.mwm
|
||||
// and small villages and hamlets that are not.
|
||||
TokenToLocalities<City> m_cities;
|
||||
TokenToLocalities<Region> m_regions[Region::TYPE_COUNT];
|
||||
CitiesBoundariesTable const & m_citiesBoundaries;
|
||||
|
||||
// Caches of features in rects. These caches are separated from
|
||||
// TokenToLocalities because the latter are quite lightweight and not
|
||||
// all of them are needed.
|
||||
PivotRectsCache m_pivotRectsCache;
|
||||
PivotRectsCache m_postcodesRectsCache;
|
||||
PivotRectsCache m_suburbsRectsCache;
|
||||
LocalityRectsCache m_localityRectsCache;
|
||||
|
||||
PostcodePointsCache m_postcodePointsCache;
|
||||
|
||||
// Postcodes features in the mwm that is currently being processed and World.mwm.
|
||||
Postcodes m_postcodes;
|
||||
|
||||
// This filter is used to throw away excess features.
|
||||
FeaturesFilter const * m_filter;
|
||||
|
||||
// Features matcher for layers intersection.
|
||||
std::map<MwmSet::MwmId, std::unique_ptr<FeaturesLayerMatcher>> m_matchersCache;
|
||||
FeaturesLayerMatcher * m_matcher;
|
||||
|
||||
// Path finder for interpretations.
|
||||
FeaturesLayerPathFinder m_finder;
|
||||
|
||||
// Search query params prepared for retrieval.
|
||||
std::vector<SearchTrieRequest<strings::LevenshteinDFA>> m_tokenRequests;
|
||||
SearchTrieRequest<strings::PrefixDFAModifier<strings::LevenshteinDFA>> m_prefixTokenRequest;
|
||||
|
||||
ResultTracer m_resultTracer;
|
||||
|
||||
PreRanker & m_preRanker;
|
||||
};
|
||||
} // namespace search
|
||||
111
libs/search/geocoder_context.cpp
Normal file
111
libs/search/geocoder_context.cpp
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
#include "search/geocoder_context.hpp"
|
||||
|
||||
#include "search/token_range.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
// static
|
||||
BaseContext::TokenType BaseContext::FromModelType(Model::Type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Model::TYPE_SUBPOI: return TOKEN_TYPE_SUBPOI;
|
||||
case Model::TYPE_COMPLEX_POI: return TOKEN_TYPE_COMPLEX_POI;
|
||||
case Model::TYPE_BUILDING: return TOKEN_TYPE_BUILDING;
|
||||
case Model::TYPE_STREET: return TOKEN_TYPE_STREET;
|
||||
case Model::TYPE_SUBURB: return TOKEN_TYPE_SUBURB;
|
||||
case Model::TYPE_UNCLASSIFIED: return TOKEN_TYPE_UNCLASSIFIED;
|
||||
case Model::TYPE_VILLAGE: return TOKEN_TYPE_VILLAGE;
|
||||
case Model::TYPE_CITY: return TOKEN_TYPE_CITY;
|
||||
case Model::TYPE_STATE: return TOKEN_TYPE_STATE;
|
||||
case Model::TYPE_COUNTRY: return TOKEN_TYPE_COUNTRY;
|
||||
case Model::TYPE_COUNT: return TOKEN_TYPE_COUNT;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// static
|
||||
BaseContext::TokenType BaseContext::FromRegionType(Region::Type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Region::TYPE_STATE: return TOKEN_TYPE_STATE;
|
||||
case Region::TYPE_COUNTRY: return TOKEN_TYPE_COUNTRY;
|
||||
case Region::TYPE_COUNT: return TOKEN_TYPE_COUNT;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
size_t BaseContext::NumTokens() const
|
||||
{
|
||||
ASSERT_EQUAL(m_tokens.size(), m_features.size(), ());
|
||||
return m_tokens.size();
|
||||
}
|
||||
|
||||
size_t BaseContext::SkipUsedTokens(size_t curToken) const
|
||||
{
|
||||
while (curToken != m_tokens.size() && IsTokenUsed(curToken))
|
||||
++curToken;
|
||||
return curToken;
|
||||
}
|
||||
|
||||
bool BaseContext::IsTokenUsed(size_t token) const
|
||||
{
|
||||
ASSERT_LESS(token, m_tokens.size(), ());
|
||||
return m_tokens[token] != TOKEN_TYPE_COUNT;
|
||||
}
|
||||
|
||||
bool BaseContext::AllTokensUsed() const
|
||||
{
|
||||
for (size_t i = 0; i < m_tokens.size(); ++i)
|
||||
if (!IsTokenUsed(i))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BaseContext::HasUsedTokensInRange(TokenRange const & range) const
|
||||
{
|
||||
ASSERT(range.IsValid(), (range));
|
||||
for (size_t i = range.Begin(); i < range.End(); ++i)
|
||||
if (IsTokenUsed(i))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t BaseContext::NumUnusedTokenGroups() const
|
||||
{
|
||||
size_t numGroups = 0;
|
||||
for (size_t i = 0; i < m_tokens.size(); ++i)
|
||||
if (!IsTokenUsed(i) && (i == 0 || IsTokenUsed(i - 1)))
|
||||
++numGroups;
|
||||
return numGroups;
|
||||
}
|
||||
|
||||
std::string ToString(BaseContext::TokenType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BaseContext::TOKEN_TYPE_SUBPOI: return "SUBPOI";
|
||||
case BaseContext::TOKEN_TYPE_COMPLEX_POI: return "COMPLEX_POI";
|
||||
case BaseContext::TOKEN_TYPE_BUILDING: return "BUILDING";
|
||||
case BaseContext::TOKEN_TYPE_STREET: return "STREET";
|
||||
case BaseContext::TOKEN_TYPE_SUBURB: return "SUBURB";
|
||||
case BaseContext::TOKEN_TYPE_UNCLASSIFIED: return "UNCLASSIFIED";
|
||||
case BaseContext::TOKEN_TYPE_VILLAGE: return "VILLAGE";
|
||||
case BaseContext::TOKEN_TYPE_CITY: return "CITY";
|
||||
case BaseContext::TOKEN_TYPE_STATE: return "STATE";
|
||||
case BaseContext::TOKEN_TYPE_COUNTRY: return "COUNTRY";
|
||||
case BaseContext::TOKEN_TYPE_POSTCODE: return "POSTCODE";
|
||||
case BaseContext::TOKEN_TYPE_COUNT: return "COUNT";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::string DebugPrint(BaseContext::TokenType type)
|
||||
{
|
||||
return ToString(type);
|
||||
}
|
||||
} // namespace search
|
||||
87
libs/search/geocoder_context.hpp
Normal file
87
libs/search/geocoder_context.hpp
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/cbv.hpp"
|
||||
#include "search/cuisine_filter.hpp"
|
||||
#include "search/features_layer.hpp"
|
||||
#include "search/geocoder_locality.hpp"
|
||||
#include "search/model.hpp"
|
||||
#include "search/retrieval.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class FeaturesFilter;
|
||||
class TokenRange;
|
||||
|
||||
struct BaseContext
|
||||
{
|
||||
enum TokenType
|
||||
{
|
||||
TOKEN_TYPE_SUBPOI,
|
||||
TOKEN_TYPE_COMPLEX_POI,
|
||||
TOKEN_TYPE_BUILDING,
|
||||
TOKEN_TYPE_STREET,
|
||||
TOKEN_TYPE_SUBURB,
|
||||
TOKEN_TYPE_UNCLASSIFIED,
|
||||
TOKEN_TYPE_VILLAGE,
|
||||
TOKEN_TYPE_CITY,
|
||||
TOKEN_TYPE_STATE,
|
||||
TOKEN_TYPE_COUNTRY,
|
||||
TOKEN_TYPE_POSTCODE,
|
||||
|
||||
TOKEN_TYPE_COUNT
|
||||
};
|
||||
|
||||
static TokenType FromModelType(Model::Type type);
|
||||
static TokenType FromRegionType(Region::Type type);
|
||||
|
||||
size_t NumTokens() const;
|
||||
|
||||
// Advances |curToken| to the nearest unused token, or to the end of
|
||||
// |m_usedTokens| if there are no unused tokens.
|
||||
size_t SkipUsedTokens(size_t curToken) const;
|
||||
|
||||
// Returns true if |token| is marked as used.
|
||||
bool IsTokenUsed(size_t token) const;
|
||||
|
||||
// Returns true iff all tokens are used.
|
||||
bool AllTokensUsed() const;
|
||||
|
||||
// Returns true if there exists at least one used token in |range|.
|
||||
bool HasUsedTokensInRange(TokenRange const & range) const;
|
||||
|
||||
// Counts number of groups of consecutive unused tokens.
|
||||
size_t NumUnusedTokenGroups() const;
|
||||
|
||||
// List of bit-vectors of features, where i-th element of the list
|
||||
// corresponds to the i-th token in the search query.
|
||||
std::vector<Retrieval::ExtendedFeatures> m_features;
|
||||
CBV m_villages;
|
||||
CBV m_streets;
|
||||
CBV m_suburbs;
|
||||
|
||||
// Stack of layers filled during geocoding.
|
||||
std::vector<FeaturesLayer> m_layers;
|
||||
|
||||
// Stack of regions filled during geocoding.
|
||||
std::vector<Region const *> m_regions;
|
||||
|
||||
City const * m_city = nullptr;
|
||||
|
||||
// This vector is used to indicate what tokens were already matched
|
||||
// and can't be re-used during the geocoding process.
|
||||
std::vector<TokenType> m_tokens;
|
||||
|
||||
// The total number of results emitted using this
|
||||
// context in all branches of the search.
|
||||
size_t m_numEmitted = 0;
|
||||
|
||||
std::unique_ptr<cuisine_filter::CuisineFilter::ScopedFilter> m_cuisineFilter;
|
||||
};
|
||||
|
||||
std::string DebugPrint(BaseContext::TokenType type);
|
||||
std::string ToString(BaseContext::TokenType type);
|
||||
} // namespace search
|
||||
28
libs/search/geocoder_locality.cpp
Normal file
28
libs/search/geocoder_locality.cpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include "search/geocoder_locality.hpp"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// static
|
||||
Model::Type Region::ToModelType(Type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Region::TYPE_STATE: return Model::TYPE_STATE;
|
||||
case Region::TYPE_COUNTRY: return Model::TYPE_COUNTRY;
|
||||
case Region::TYPE_COUNT: return Model::TYPE_COUNT;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::string DebugPrint(Locality const & locality)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "Locality [ ";
|
||||
os << "m_featureId=" << DebugPrint(locality.m_featureId) << ", ";
|
||||
os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", ";
|
||||
os << " ]";
|
||||
return os.str();
|
||||
}
|
||||
} // namespace search
|
||||
80
libs/search/geocoder_locality.hpp
Normal file
80
libs/search/geocoder_locality.hpp
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/doc_vec.hpp"
|
||||
#include "search/model.hpp"
|
||||
#include "search/token_range.hpp"
|
||||
|
||||
#include "indexer/feature_decl.hpp"
|
||||
|
||||
#include "storage/country_info_getter.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class IdfMap;
|
||||
|
||||
struct Locality
|
||||
{
|
||||
Locality(FeatureID const & fID, TokenRange const & tokenRange, QueryVec const & queryVec, bool exactMatch)
|
||||
: m_featureId(fID)
|
||||
, m_tokenRange(tokenRange)
|
||||
, m_queryVec(queryVec)
|
||||
, m_exactMatch(exactMatch)
|
||||
{}
|
||||
|
||||
uint32_t GetFeatureIndex() const { return m_featureId.m_index; }
|
||||
|
||||
FeatureID m_featureId;
|
||||
TokenRange m_tokenRange;
|
||||
QueryVec m_queryVec;
|
||||
bool m_exactMatch;
|
||||
};
|
||||
|
||||
// This struct represents a country or US- or Canadian- state. It
|
||||
// is used to filter maps before search.
|
||||
struct Region : public Locality
|
||||
{
|
||||
enum Type
|
||||
{
|
||||
TYPE_STATE,
|
||||
TYPE_COUNTRY,
|
||||
TYPE_COUNT
|
||||
};
|
||||
|
||||
Region(Locality && locality, Type type) : Locality(std::move(locality)), m_center(0, 0), m_type(type) {}
|
||||
|
||||
static Model::Type ToModelType(Type type);
|
||||
|
||||
storage::CountryInfoGetter::RegionIdVec m_ids;
|
||||
m2::PointD m_center;
|
||||
Type m_type;
|
||||
};
|
||||
|
||||
// This struct represents a city or a village. It is used to filter features
|
||||
// during search.
|
||||
// todo(@m) It works well as is, but consider a new naming scheme
|
||||
// when counties etc. are added. E.g., Region for countries and
|
||||
// states and Locality for smaller settlements.
|
||||
struct City : public Locality
|
||||
{
|
||||
City(Locality && locality, Model::Type type) : Locality(std::move(locality)), m_type(type) {}
|
||||
|
||||
m2::RectD m_rect;
|
||||
Model::Type m_type;
|
||||
};
|
||||
|
||||
struct Suburb
|
||||
{
|
||||
Suburb(FeatureID const & featureId, TokenRange const & tokenRange) : m_featureId(featureId), m_tokenRange(tokenRange)
|
||||
{}
|
||||
|
||||
FeatureID m_featureId;
|
||||
TokenRange m_tokenRange;
|
||||
};
|
||||
|
||||
std::string DebugPrint(Locality const & locality);
|
||||
} // namespace search
|
||||
68
libs/search/geometry_cache.cpp
Normal file
68
libs/search/geometry_cache.cpp
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
#include "search/geometry_cache.hpp"
|
||||
|
||||
#include "search/geometry_utils.hpp"
|
||||
#include "search/mwm_context.hpp"
|
||||
#include "search/retrieval.hpp"
|
||||
|
||||
#include "coding/point_coding.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
// GeometryCache -----------------------------------------------------------------------------------
|
||||
GeometryCache::GeometryCache(size_t maxNumEntries, base::Cancellable const & cancellable)
|
||||
: m_maxNumEntries(maxNumEntries)
|
||||
, m_cancellable(cancellable)
|
||||
{
|
||||
CHECK_GREATER(m_maxNumEntries, 0, ());
|
||||
}
|
||||
|
||||
void GeometryCache::InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, Entry & entry)
|
||||
{
|
||||
Retrieval retrieval(context, m_cancellable);
|
||||
|
||||
entry.m_rect = rect;
|
||||
entry.m_cbv = retrieval.RetrieveGeometryFeatures(rect, scale);
|
||||
entry.m_scale = scale;
|
||||
}
|
||||
|
||||
// PivotRectsCache ---------------------------------------------------------------------------------
|
||||
PivotRectsCache::PivotRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable, double maxRadiusMeters)
|
||||
: GeometryCache(maxNumEntries, cancellable)
|
||||
, m_maxRadiusMeters(maxRadiusMeters)
|
||||
{}
|
||||
|
||||
CBV PivotRectsCache::Get(MwmContext const & context, m2::RectD const & rect, int scale)
|
||||
{
|
||||
auto p = FindOrCreateEntry(context.GetId(), [&rect, &scale](Entry const & entry)
|
||||
{
|
||||
return scale == entry.m_scale &&
|
||||
(entry.m_rect.IsRectInside(rect) || IsEqualMercator(rect, entry.m_rect, kMwmPointAccuracy));
|
||||
});
|
||||
auto & entry = p.first;
|
||||
if (p.second)
|
||||
{
|
||||
m2::RectD normRect = mercator::RectByCenterXYAndSizeInMeters(rect.Center(), m_maxRadiusMeters);
|
||||
if (!normRect.IsRectInside(rect))
|
||||
normRect = rect;
|
||||
InitEntry(context, normRect, scale, entry);
|
||||
}
|
||||
return entry.m_cbv;
|
||||
}
|
||||
|
||||
// LocalityRectsCache ------------------------------------------------------------------------------
|
||||
LocalityRectsCache::LocalityRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable)
|
||||
: GeometryCache(maxNumEntries, cancellable)
|
||||
{}
|
||||
|
||||
CBV LocalityRectsCache::Get(MwmContext const & context, m2::RectD const & rect, int scale)
|
||||
{
|
||||
auto p = FindOrCreateEntry(context.GetId(), [&rect, &scale](Entry const & entry)
|
||||
{ return scale == entry.m_scale && IsEqualMercator(rect, entry.m_rect, kMwmPointAccuracy); });
|
||||
auto & entry = p.first;
|
||||
if (p.second)
|
||||
InitEntry(context, rect, scale, entry);
|
||||
return entry.m_cbv;
|
||||
}
|
||||
} // namespace search
|
||||
101
libs/search/geometry_cache.hpp
Normal file
101
libs/search/geometry_cache.hpp
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/cbv.hpp"
|
||||
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
namespace base
|
||||
{
|
||||
class Cancellable;
|
||||
}
|
||||
|
||||
namespace search
|
||||
{
|
||||
class MwmContext;
|
||||
|
||||
// This class represents a simple cache of features in rects for all mwms.
|
||||
//
|
||||
// *NOTE* This class is not thread-safe.
|
||||
class GeometryCache
|
||||
{
|
||||
public:
|
||||
virtual ~GeometryCache() = default;
|
||||
|
||||
// Returns (hopefully, cached) list of features in a given
|
||||
// rect. Note that return value may be invalidated on next calls to
|
||||
// this method.
|
||||
virtual CBV Get(MwmContext const & context, m2::RectD const & rect, int scale) = 0;
|
||||
|
||||
inline void Clear() { m_entries.clear(); }
|
||||
|
||||
protected:
|
||||
struct Entry
|
||||
{
|
||||
m2::RectD m_rect;
|
||||
CBV m_cbv;
|
||||
int m_scale = 0;
|
||||
};
|
||||
|
||||
// |maxNumEntries| denotes the maximum number of rectangles that
|
||||
// will be cached for each mwm individually.
|
||||
GeometryCache(size_t maxNumEntries, base::Cancellable const & cancellable);
|
||||
|
||||
template <typename Pred>
|
||||
std::pair<Entry &, bool> FindOrCreateEntry(MwmSet::MwmId const & id, Pred && pred)
|
||||
{
|
||||
auto & entries = m_entries[id];
|
||||
auto it = std::find_if(entries.begin(), entries.end(), std::forward<Pred>(pred));
|
||||
if (it != entries.end())
|
||||
{
|
||||
if (it != entries.begin())
|
||||
iter_swap(entries.begin(), it);
|
||||
return std::pair<Entry &, bool>(entries.front(), false);
|
||||
}
|
||||
|
||||
entries.emplace_front();
|
||||
if (entries.size() == m_maxNumEntries + 1)
|
||||
entries.pop_back();
|
||||
|
||||
ASSERT_LESS_OR_EQUAL(entries.size(), m_maxNumEntries, ());
|
||||
ASSERT(!entries.empty(), ());
|
||||
return std::pair<Entry &, bool>(entries.front(), true);
|
||||
}
|
||||
|
||||
void InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, Entry & entry);
|
||||
|
||||
std::map<MwmSet::MwmId, std::deque<Entry>> m_entries;
|
||||
size_t const m_maxNumEntries;
|
||||
base::Cancellable const & m_cancellable;
|
||||
};
|
||||
|
||||
class PivotRectsCache : public GeometryCache
|
||||
{
|
||||
public:
|
||||
PivotRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable, double maxRadiusMeters);
|
||||
|
||||
// GeometryCache overrides:
|
||||
CBV Get(MwmContext const & context, m2::RectD const & rect, int scale) override;
|
||||
|
||||
private:
|
||||
double const m_maxRadiusMeters;
|
||||
};
|
||||
|
||||
class LocalityRectsCache : public GeometryCache
|
||||
{
|
||||
public:
|
||||
LocalityRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable);
|
||||
|
||||
// GeometryCache overrides:
|
||||
CBV Get(MwmContext const & context, m2::RectD const & rect, int scale) override;
|
||||
};
|
||||
} // namespace search
|
||||
18
libs/search/geometry_utils.cpp
Normal file
18
libs/search/geometry_utils.cpp
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#include "search/geometry_utils.hpp"
|
||||
|
||||
#include "indexer/scales.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
double PointDistance(m2::PointD const & a, m2::PointD const & b)
|
||||
{
|
||||
return mercator::DistanceOnEarth(a, b);
|
||||
}
|
||||
|
||||
bool IsEqualMercator(m2::RectD const & r1, m2::RectD const & r2, double eps)
|
||||
{
|
||||
return m2::IsEqual(r1, r2, eps, eps);
|
||||
}
|
||||
} // namespace search
|
||||
14
libs/search/geometry_utils.hpp
Normal file
14
libs/search/geometry_utils.hpp
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
// Distance between 2 mercator points in meters.
|
||||
double PointDistance(m2::PointD const & a, m2::PointD const & b);
|
||||
|
||||
// Tests whether two rects given in the mercator projection are
|
||||
// equal with the absolute precision |eps|.
|
||||
bool IsEqualMercator(m2::RectD const & r1, m2::RectD const & r2, double eps);
|
||||
} // namespace search
|
||||
68
libs/search/highlighting.cpp
Normal file
68
libs/search/highlighting.cpp
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
#include "search/highlighting.hpp"
|
||||
#include "std/target_os.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// Makes continuous range for tokens and prefix.
|
||||
template <class Iter, class Value>
|
||||
class CombinedIterator
|
||||
{
|
||||
Iter m_cur;
|
||||
Iter m_end;
|
||||
Value const * m_val;
|
||||
|
||||
public:
|
||||
CombinedIterator(Iter cur, Iter end, Value const * val) : m_cur(cur), m_end(end), m_val(val) {}
|
||||
|
||||
Value const & operator*() const
|
||||
{
|
||||
ASSERT(m_val != nullptr || m_cur != m_end, ("dereferencing of an empty iterator"));
|
||||
if (m_cur != m_end)
|
||||
return *m_cur;
|
||||
|
||||
return *m_val;
|
||||
}
|
||||
|
||||
CombinedIterator & operator++()
|
||||
{
|
||||
if (m_cur != m_end)
|
||||
++m_cur;
|
||||
else
|
||||
m_val = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(CombinedIterator const & other) const { return m_val == other.m_val && m_cur == other.m_cur; }
|
||||
|
||||
bool operator!=(CombinedIterator const & other) const { return !(*this == other); }
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void HighlightResult(QueryTokens const & tokens, strings::UniString const & prefix, Result & res)
|
||||
{
|
||||
using Iter = QueryTokens::const_iterator;
|
||||
using CombinedIter = CombinedIterator<Iter, strings::UniString>;
|
||||
|
||||
CombinedIter beg(tokens.begin(), tokens.end(), prefix.empty() ? nullptr : &prefix);
|
||||
CombinedIter end(tokens.end() /* cur */, tokens.end() /* end */, nullptr);
|
||||
|
||||
// Highlight Title (potentially including branch)
|
||||
std::string titleForHighlighting = res.GetString();
|
||||
#if defined(OMIM_OS_IPHONE)
|
||||
std::string const & branch = res.GetBranch();
|
||||
|
||||
// On iOS we append branch text to the title for highlighting if it's not already present.
|
||||
if (!branch.empty() && titleForHighlighting.find(branch) == std::string::npos)
|
||||
titleForHighlighting += " " + branch;
|
||||
#endif
|
||||
|
||||
SearchStringTokensIntersectionRanges(
|
||||
titleForHighlighting, beg, end, [&](std::pair<uint16_t, uint16_t> const & range) { res.AddHighlightRange(range); });
|
||||
|
||||
// Highlight description.
|
||||
SearchStringTokensIntersectionRanges(res.GetAddress(), beg, end, [&](std::pair<uint16_t, uint16_t> const & range)
|
||||
{ res.AddDescHighlightRange(range); });
|
||||
}
|
||||
} // namespace search
|
||||
62
libs/search/highlighting.hpp
Normal file
62
libs/search/highlighting.hpp
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
|
||||
#include "search/common.hpp"
|
||||
#include "search/result.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace search
|
||||
{
|
||||
template <typename LowTokensIter, typename F>
|
||||
void SearchStringTokensIntersectionRanges(std::string const & s, LowTokensIter itLowBeg, LowTokensIter itLowEnd, F && f)
|
||||
{
|
||||
// split input query by tokens and prefix
|
||||
search::Delimiters delimsTest;
|
||||
size_t pos = 0;
|
||||
|
||||
strings::UniString const str = strings::MakeUniString(s);
|
||||
size_t const strLen = str.size();
|
||||
while (pos < strLen)
|
||||
{
|
||||
// skip delimeters
|
||||
while (pos < strLen && delimsTest(str[pos]))
|
||||
++pos;
|
||||
|
||||
size_t const beg = pos;
|
||||
|
||||
// find token
|
||||
while (pos < strLen && !delimsTest(str[pos]))
|
||||
++pos;
|
||||
|
||||
strings::UniString subStr;
|
||||
subStr.assign(str.begin() + beg, str.begin() + pos);
|
||||
size_t maxCount = 0;
|
||||
std::pair<uint16_t, uint16_t> result(0, 0);
|
||||
|
||||
for (auto itLow = itLowBeg; itLow != itLowEnd; ++itLow)
|
||||
{
|
||||
size_t const cnt = strings::CountNormLowerSymbols(subStr, *itLow);
|
||||
|
||||
if (cnt > maxCount)
|
||||
{
|
||||
maxCount = cnt;
|
||||
result.first = beg;
|
||||
result.second = cnt;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.second != 0)
|
||||
f(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Adds to |res| the ranges that match the query tokens and, therefore, should be highlighted.
|
||||
// The query is passed in |tokens| and |prefix|.
|
||||
void HighlightResult(QueryTokens const & tokens, strings::UniString const & prefix, Result & res);
|
||||
} // namespace search
|
||||
1225
libs/search/house_detector.cpp
Normal file
1225
libs/search/house_detector.cpp
Normal file
File diff suppressed because it is too large
Load diff
250
libs/search/house_detector.hpp
Normal file
250
libs/search/house_detector.hpp
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/feature_loader.hpp"
|
||||
#include "search/projection_on_street.hpp"
|
||||
|
||||
#include "indexer/feature_decl.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace search
|
||||
{
|
||||
struct ParsedNumber
|
||||
{
|
||||
public:
|
||||
/// @todo Pass correct "American" notation flag.
|
||||
ParsedNumber(std::string const & number, bool american = false);
|
||||
|
||||
std::string const & GetNumber() const { return m_fullN; }
|
||||
bool IsOdd() const { return (m_startN % 2 == 1); }
|
||||
int GetIntNumber() const { return m_startN; }
|
||||
|
||||
bool IsIntersect(ParsedNumber const & number, int offset = 0) const;
|
||||
|
||||
private:
|
||||
std::string m_fullN;
|
||||
int m_startN;
|
||||
int m_endN;
|
||||
};
|
||||
|
||||
class House
|
||||
{
|
||||
public:
|
||||
House(std::string const & number, m2::PointD const & point) : m_number(number), m_point(point) {}
|
||||
|
||||
std::string const & GetNumber() const { return m_number.GetNumber(); }
|
||||
int GetIntNumber() const { return m_number.GetIntNumber(); }
|
||||
m2::PointD const & GetPosition() const { return m_point; }
|
||||
|
||||
/// @return \n
|
||||
/// -1 - no match;
|
||||
/// 0 - full match;
|
||||
/// 1 - integer number match with odd (even).
|
||||
/// 2 - integer number match.
|
||||
int GetMatch(ParsedNumber const & number) const;
|
||||
bool GetNearbyMatch(ParsedNumber const & number) const;
|
||||
|
||||
private:
|
||||
ParsedNumber m_number;
|
||||
m2::PointD m_point;
|
||||
};
|
||||
|
||||
// NOTE: DO NOT DELETE instances of this class by a pointer/reference
|
||||
// to ProjectionOnStreet, because both classes have non-virtual destructors.
|
||||
struct HouseProjection : public ProjectionOnStreet
|
||||
{
|
||||
struct LessDistance
|
||||
{
|
||||
bool operator()(HouseProjection const * p1, HouseProjection const * p2) const
|
||||
{
|
||||
return p1->m_distMeters < p2->m_distMeters;
|
||||
}
|
||||
};
|
||||
|
||||
class EqualHouse
|
||||
{
|
||||
public:
|
||||
explicit EqualHouse(House const * h) : m_house(h) {}
|
||||
bool operator()(HouseProjection const * p) const { return m_house == p->m_house; }
|
||||
|
||||
private:
|
||||
House const * m_house;
|
||||
};
|
||||
|
||||
bool IsOdd() const { return (m_house->GetIntNumber() % 2 == 1); }
|
||||
|
||||
House const * m_house;
|
||||
|
||||
/// Distance in mercator, from street beginning to projection on street
|
||||
double m_streetDistance;
|
||||
};
|
||||
|
||||
// many features combines to street
|
||||
class Street
|
||||
{
|
||||
public:
|
||||
Street() : m_length(0.0), m_number(-1), m_housesRead(false) {}
|
||||
|
||||
void Reverse();
|
||||
void SortHousesProjection();
|
||||
|
||||
/// Get limit rect for street with ortho offset to the left and right.
|
||||
m2::RectD GetLimitRect(double offsetMeters) const;
|
||||
|
||||
double GetLength() const;
|
||||
|
||||
double GetPrefixLength(size_t numSegs) const;
|
||||
|
||||
static bool IsSameStreets(Street const * s1, Street const * s2) { return s1->m_processedName == s2->m_processedName; }
|
||||
|
||||
void SetName(std::string_view name);
|
||||
std::string const & GetDbgName() const { return m_processedName; }
|
||||
std::string const & GetName() const { return m_name; }
|
||||
|
||||
std::vector<m2::PointD> m_points;
|
||||
std::vector<HouseProjection> m_houses;
|
||||
double m_length; /// Length in mercator
|
||||
int m_number; /// Some ordered number after merging
|
||||
bool m_housesRead;
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
std::string m_processedName;
|
||||
};
|
||||
|
||||
class MergedStreet
|
||||
{
|
||||
public:
|
||||
struct Index
|
||||
{
|
||||
size_t s, h;
|
||||
Index() : s(0), h(0) {}
|
||||
};
|
||||
|
||||
struct GreaterLength
|
||||
{
|
||||
bool operator()(MergedStreet const & s1, MergedStreet const & s2) const { return (s1.m_length > s2.m_length); }
|
||||
};
|
||||
|
||||
MergedStreet() : m_length(0.0) {}
|
||||
|
||||
std::string const & GetDbgName() const;
|
||||
std::string const & GetName() const;
|
||||
bool IsHousesRead() const;
|
||||
void FinishReadingHouses();
|
||||
|
||||
HouseProjection const * GetHousePivot(bool isOdd, bool & sign) const;
|
||||
|
||||
void Swap(MergedStreet & s)
|
||||
{
|
||||
m_cont.swap(s.m_cont);
|
||||
std::swap(m_length, s.m_length);
|
||||
}
|
||||
|
||||
Index Begin() const
|
||||
{
|
||||
Index i;
|
||||
Next(i);
|
||||
return i;
|
||||
}
|
||||
|
||||
void Inc(Index & i) const
|
||||
{
|
||||
++i.h;
|
||||
Next(i);
|
||||
}
|
||||
|
||||
bool IsEnd(Index const & i) const { return i.s == m_cont.size(); }
|
||||
|
||||
HouseProjection const & Get(Index const & i) const
|
||||
{
|
||||
ASSERT(!IsEnd(i), ());
|
||||
return m_cont[i.s]->m_houses[i.h];
|
||||
}
|
||||
|
||||
std::deque<Street *> m_cont;
|
||||
|
||||
private:
|
||||
void Erase(Index & i);
|
||||
void Next(Index & i) const;
|
||||
|
||||
double m_length;
|
||||
};
|
||||
|
||||
struct HouseResult
|
||||
{
|
||||
HouseResult(House const * house, MergedStreet const * street) : m_house(house), m_street(street) {}
|
||||
|
||||
bool operator<(HouseResult const & a) const { return m_house < a.m_house; }
|
||||
bool operator==(HouseResult const & a) const { return m_house == a.m_house; }
|
||||
|
||||
m2::PointD const & GetOrg() const { return m_house->GetPosition(); }
|
||||
|
||||
House const * m_house;
|
||||
MergedStreet const * m_street;
|
||||
};
|
||||
|
||||
class HouseDetector
|
||||
{
|
||||
public:
|
||||
using StreetMap = std::map<FeatureID, Street *>;
|
||||
using HouseMap = std::map<FeatureID, House *>;
|
||||
using StreetPtr = std::pair<Street *, bool>;
|
||||
|
||||
static int const DEFAULT_OFFSET_M = 200;
|
||||
|
||||
explicit HouseDetector(DataSource const & dataSource);
|
||||
~HouseDetector();
|
||||
|
||||
int LoadStreets(std::vector<FeatureID> const & ids);
|
||||
/// @return number of different joined streets.
|
||||
int MergeStreets();
|
||||
|
||||
void ReadAllHouses(double offsetMeters = DEFAULT_OFFSET_M);
|
||||
|
||||
void GetHouseForName(std::string const & houseNumber, std::vector<HouseResult> & res);
|
||||
|
||||
void ClearCaches();
|
||||
void ClearUnusedStreets(std::vector<FeatureID> const & ids);
|
||||
|
||||
private:
|
||||
StreetPtr FindConnection(Street const * st, bool beg) const;
|
||||
|
||||
void MergeStreets(Street * st);
|
||||
|
||||
template <typename ProjectionCalculator>
|
||||
void ReadHouse(FeatureType & f, Street * st, ProjectionCalculator & calc);
|
||||
|
||||
void ReadHouses(Street * st);
|
||||
|
||||
void SetMetersToMercator(double factor);
|
||||
|
||||
double GetApprLengthMeters(int index) const;
|
||||
|
||||
FeatureLoader m_loader;
|
||||
|
||||
StreetMap m_id2st;
|
||||
HouseMap m_id2house;
|
||||
|
||||
std::vector<std::pair<m2::PointD, Street *>> m_end2st;
|
||||
std::vector<MergedStreet> m_streets;
|
||||
|
||||
double m_metersToMercator = 0.0;
|
||||
int m_streetNum = 0;
|
||||
double m_houseOffsetM = 0.0;
|
||||
};
|
||||
|
||||
std::string DebugPrint(HouseProjection const & p);
|
||||
std::string DebugPrint(HouseResult const & r);
|
||||
} // namespace search
|
||||
637
libs/search/house_numbers_matcher.cpp
Normal file
637
libs/search/house_numbers_matcher.cpp
Normal file
|
|
@ -0,0 +1,637 @@
|
|||
#include "search/house_numbers_matcher.hpp"
|
||||
|
||||
#include "indexer/string_set.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/iterator/transform_iterator.hpp>
|
||||
|
||||
using boost::make_transform_iterator;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace house_numbers
|
||||
{
|
||||
using namespace std;
|
||||
using namespace strings;
|
||||
|
||||
namespace
|
||||
{
|
||||
// Common strings in house numbers.
|
||||
// To get this list, just run:
|
||||
//
|
||||
// ./clusterize-tag-values.lisp house-number-strings path-to-taginfo-db.db > strings.txt
|
||||
// cat strings.txt |
|
||||
// awk '{ if ($1 >= 100 && length($3) != 0) { printf("\"%s\",\n", $3) } }' |
|
||||
// sort | uniq
|
||||
//
|
||||
// *NOTE* there is a list of exceptions at the end.
|
||||
|
||||
/// @todo By VNG: This list looks hillarious :) Definitely should set some lower bound number
|
||||
/// to filter very exotic entries in addr:housenumber.
|
||||
|
||||
// Removed street keywords for now and ALL one-letter strings. It is sensitive for search speed, because:
|
||||
// LooksLikeHouseNumber -> MatchBuildingsWithStreets -> *heavy* StreetVicinityLoader::GetStreet
|
||||
// "av", "avenida",
|
||||
// "ca", "cal", "calle", "carrera", "court",
|
||||
// "da", "de", "di".
|
||||
// "ga",
|
||||
// "ł", "la",
|
||||
// "ne",
|
||||
// "pa", "par", "park", "plaza",
|
||||
// "rd", "ro", "road",
|
||||
// "so", "south", "st", "street",
|
||||
// "vi",
|
||||
// "way", "we", "west",
|
||||
|
||||
char const * g_strings[] = {
|
||||
"aa", "ab", "abc", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj", "ak", "al", "am", "an", "ao", "ap", "aq", "ar",
|
||||
"are", "as", "at", "au", "aw", "ax", "ay", "az", "azm", "ba", "bab", "bah", "bak", "bb", "bc", "bd", "be", "bedr",
|
||||
"ben", "bf", "bg", "bh", "bij", "bis", "bk", "bl", "bldg", "blk", "bloc", "block", "bloco", "blok", "bm", "bmn",
|
||||
"bn", "bo", "boe", "bol", "bor", "bov", "box", "bp", "br", "bra", "brc", "bs", "bsa", "bu", "building", "bv", "bwn",
|
||||
"bx", "by", "cab", "cat", "cbi", "cbu", "cc", "ccz", "cd", "ce", "centre", "cfn", "cgc", "cjg", "cl", "club",
|
||||
"cottage", "cottages", "cso", "cum", "db", "dd", "df", "dia", "dvu", "ec", "ee", "eh", "em", "en", "esm", "ev",
|
||||
"fdo", "fer", "ff", "flat", "flats", "floor", "gar", "gara", "gas", "gb", "gg", "gr", "grg", "ha", "haus", "hh",
|
||||
"hl", "ho", "house", "hr", "hs", "hv", "ii", "iii", "int", "iv", "ix", "jab", "jf", "jj", "jms", "jtg", "ka", "kab",
|
||||
"kk", "kmb", "kmk", "knn", "koy", "kp", "kra", "ksn", "kud", "ldo", "ll", "local", "loja", "lot", "lote", "lsb",
|
||||
"lt", "mac", "mad", "mah", "mak", "mat", "mb", "mbb", "mbn", "mch", "mei", "mks", "mm", "mny", "mo", "mok", "mor",
|
||||
"msb", "mtj", "mtk", "mvd", "na", "ncc", "nij", "nn", "no", "nr", "nst", "nu", "nut", "of", "ofof", "old", "one",
|
||||
"oo", "opl", "pa", "pap", "pav", "pb", "pch", "pg", "ph", "phd", "pkf", "plot", "po", "pos", "pp", "pr", "pra",
|
||||
"pya", "qq", "quater", "ra", "rbo", "rear", "reisach", "rk", "rm", "rosso", "rs", "rw", "sab", "sal", "sav", "sb",
|
||||
"sba", "sbb", "sbl", "sbn", "sbx", "sc", "sch", "sco", "seb", "sep", "sf", "sgr", "sir", "sj", "sl", "sm", "sn",
|
||||
"snc", "som", "sp", "spi", "spn", "ss", "sta", "stc", "std", "stiege", "suite", "sur", "tam", "ter", "terrace",
|
||||
"tf", "th", "the", "tl", "to", "torre", "tr", "traf", "trd", "ts", "tt", "tu", "uhm", "unit", "utc", "vii", "wa",
|
||||
"wf", "wink", "wrh", "ws", "wsb", "xx", "za", "zh", "zona", "zu", "zw", "א", "ב", "ג", "α", "бб", "бл", "вл", "вх",
|
||||
"лит", "разр", "стр", "тп", "уч", "участок", "ა", "丁目", "之", "号", "號",
|
||||
|
||||
// List of exceptions
|
||||
"владение"};
|
||||
|
||||
// Common strings in house numbers.
|
||||
// To get this list, just run:
|
||||
//
|
||||
// ./clusterize-tag-values.lisp house-number path-to-taginfo-db.db > numbers.txt
|
||||
// tail -n +2 numbers.txt | head -78 | sed 's/^.*) \(.*\) \[.*$/"\1"/g;s/[ -/]//g;s/$/,/' |
|
||||
// sort | uniq
|
||||
vector<string> const g_patterns = {"BL", "BLN", "BLNSL", "BN", "BNL", "BNSL", "L", "LL", "LN", "LNL", "LNLN", "LNN",
|
||||
"N", "NBL", "NBLN", "NBN", "NBNBN", "NBNL", "NL", "NLBN", "NLL", "NLLN", "NLN",
|
||||
"NLNL", "NLS", "NLSN", "NN", "NNBN", "NNL", "NNLN", "NNN", "NNS", "NS", "NSN", "NSS",
|
||||
"S", "SL", "SLL", "SLN", "SN", "SNBNSS", "SNL", "SNN", "SS", "SSN", "SSS", "SSSS",
|
||||
|
||||
// List of exceptions
|
||||
"NNBNL"};
|
||||
|
||||
// List of patterns which look like house numbers more than other patterns. Constructed by hand.
|
||||
vector<string> const g_patternsStrict = {"N", "NBN", "NBL", "NL"};
|
||||
|
||||
// List of common synonyms for building parts. Constructed by hand.
|
||||
char const * g_buildingPartSynonyms[] = {"building", "bldg", "bld", "bl", "unit", "block", "blk", "корпус",
|
||||
"корп", "кор", "литер", "лит", "строение", "стр", "блок", "бл"};
|
||||
|
||||
// List of common stop words for buildings. Constructed by hand.
|
||||
UniString const g_stopWords[] = {MakeUniString("дом"), MakeUniString("house"), MakeUniString("д")};
|
||||
|
||||
bool IsStopWord(UniString const & s, bool isPrefix)
|
||||
{
|
||||
for (auto const & p : g_stopWords)
|
||||
if ((isPrefix && StartsWith(p, s)) || (!isPrefix && p == s))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
class BuildingPartSynonymsMatcher
|
||||
{
|
||||
public:
|
||||
using Synonyms = StringSet<UniChar, 4>;
|
||||
|
||||
BuildingPartSynonymsMatcher()
|
||||
{
|
||||
for (auto const & s : g_buildingPartSynonyms)
|
||||
{
|
||||
UniString const us = MakeUniString(s);
|
||||
m_synonyms.Add(us.begin(), us.end());
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if |s| looks like a building synonym.
|
||||
inline bool Has(UniString const & s) const { return m_synonyms.Has(s.begin(), s.end()) == Synonyms::Status::Full; }
|
||||
|
||||
private:
|
||||
Synonyms m_synonyms;
|
||||
};
|
||||
|
||||
class StringsMatcher
|
||||
{
|
||||
public:
|
||||
using Strings = StringSet<UniChar, 8>;
|
||||
|
||||
StringsMatcher()
|
||||
{
|
||||
for (auto const & s : g_strings)
|
||||
{
|
||||
UniString const us = MakeUniString(s);
|
||||
m_strings.Add(us.begin(), us.end());
|
||||
}
|
||||
|
||||
for (auto const & s : g_buildingPartSynonyms)
|
||||
{
|
||||
UniString const us = MakeUniString(s);
|
||||
m_strings.Add(us.begin(), us.end());
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true when |s| may be a full substring of a house number,
|
||||
// or a prefix of some valid substring of a house number, when
|
||||
// |isPrefix| is true.
|
||||
bool Has(UniString const & s, bool isPrefix) const
|
||||
{
|
||||
auto const status = m_strings.Has(s.begin(), s.end());
|
||||
switch (status)
|
||||
{
|
||||
case Strings::Status::Absent: return false;
|
||||
case Strings::Status::Prefix: return isPrefix;
|
||||
case Strings::Status::Full: return true;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
private:
|
||||
Strings m_strings;
|
||||
};
|
||||
|
||||
class HouseNumberClassifier
|
||||
{
|
||||
public:
|
||||
using Patterns = StringSet<Token::Type, 4>;
|
||||
|
||||
HouseNumberClassifier(vector<string> const & patterns = g_patterns)
|
||||
{
|
||||
for (auto const & p : patterns)
|
||||
m_patterns.Add(make_transform_iterator(p.begin(), &CharToType), make_transform_iterator(p.end(), &CharToType));
|
||||
}
|
||||
|
||||
// Returns true when the string |s| looks like a valid house number,
|
||||
// (or a prefix of some valid house number, when |isPrefix| is
|
||||
// true).
|
||||
bool LooksGood(UniString const & s, bool isPrefix) const
|
||||
{
|
||||
TokensT parse;
|
||||
Tokenize(s, isPrefix, parse);
|
||||
|
||||
size_t i = 0;
|
||||
for (size_t j = 0; j != parse.size(); ++j)
|
||||
{
|
||||
auto const & token = parse[j];
|
||||
auto const type = token.m_type;
|
||||
switch (type)
|
||||
{
|
||||
case Token::TYPE_SEPARATOR: break;
|
||||
case Token::TYPE_GROUP_SEPARATOR: break;
|
||||
case Token::TYPE_HYPHEN: break;
|
||||
case Token::TYPE_SLASH: break;
|
||||
case Token::TYPE_STRING:
|
||||
{
|
||||
if (IsStopWord(token.m_value, token.m_prefix))
|
||||
break;
|
||||
if (!m_matcher.Has(token.m_value, token.m_prefix))
|
||||
return false;
|
||||
[[fallthrough]];
|
||||
}
|
||||
case Token::TYPE_LETTER:
|
||||
{
|
||||
if (j == 0 && IsStopWord(token.m_value, token.m_prefix))
|
||||
break;
|
||||
[[fallthrough]];
|
||||
}
|
||||
case Token::TYPE_NUMBER:
|
||||
case Token::TYPE_BUILDING_PART:
|
||||
case Token::TYPE_BUILDING_PART_OR_LETTER:
|
||||
parse[i] = std::move(parse[j]);
|
||||
ASSERT(!parse[i].m_value.empty(), ());
|
||||
++i;
|
||||
}
|
||||
}
|
||||
parse.resize(i);
|
||||
|
||||
auto const status = m_patterns.Has(make_transform_iterator(parse.begin(), &TokenToType),
|
||||
make_transform_iterator(parse.end(), &TokenToType));
|
||||
switch (status)
|
||||
{
|
||||
case Patterns::Status::Absent: return false;
|
||||
case Patterns::Status::Prefix: return true;
|
||||
case Patterns::Status::Full: return true;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
private:
|
||||
static Token::Type CharToType(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'N': return Token::TYPE_NUMBER;
|
||||
case 'S': return Token::TYPE_STRING;
|
||||
case 'B': return Token::TYPE_BUILDING_PART;
|
||||
case 'L': return Token::TYPE_LETTER;
|
||||
case 'U': return Token::TYPE_BUILDING_PART_OR_LETTER;
|
||||
default: CHECK(false, ("Unexpected character:", c)); return Token::TYPE_SEPARATOR;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
static Token::Type TokenToType(Token const & token) { return token.m_type; }
|
||||
|
||||
StringsMatcher m_matcher;
|
||||
Patterns m_patterns;
|
||||
};
|
||||
|
||||
Token::Type GetCharType(UniChar c)
|
||||
{
|
||||
static UniString const kSeps = MakeUniString(" \t\"\\().#~");
|
||||
static UniString const kGroupSeps = MakeUniString(",|;+");
|
||||
|
||||
if (IsASCIIDigit(c))
|
||||
return Token::TYPE_NUMBER;
|
||||
if (find(kSeps.begin(), kSeps.end(), c) != kSeps.end())
|
||||
return Token::TYPE_SEPARATOR;
|
||||
if (find(kGroupSeps.begin(), kGroupSeps.end(), c) != kGroupSeps.end())
|
||||
return Token::TYPE_GROUP_SEPARATOR;
|
||||
if (c == '-')
|
||||
return Token::TYPE_HYPHEN;
|
||||
if (c == '/')
|
||||
return Token::TYPE_SLASH;
|
||||
return Token::TYPE_STRING;
|
||||
}
|
||||
|
||||
bool IsLiteralType(Token::Type type)
|
||||
{
|
||||
return type == Token::TYPE_STRING || type == Token::TYPE_LETTER || type == Token::TYPE_BUILDING_PART_OR_LETTER;
|
||||
}
|
||||
|
||||
// Leaves only numbers and letters, removes all trailing prefix
|
||||
// tokens. Then, does following:
|
||||
//
|
||||
// * when there is at least one number, drops all tokens until the
|
||||
// number and sorts the rest
|
||||
// * when there are no numbers at all, sorts tokens
|
||||
void SimplifyParse(TokensT & tokens)
|
||||
{
|
||||
if (!tokens.empty() && tokens.back().m_prefix)
|
||||
tokens.pop_back();
|
||||
|
||||
size_t i = 0;
|
||||
size_t j = 0;
|
||||
while (j != tokens.size() && tokens[j].m_type != Token::TYPE_NUMBER)
|
||||
++j;
|
||||
for (; j != tokens.size(); ++j)
|
||||
{
|
||||
auto const type = tokens[j].m_type;
|
||||
if (type == Token::TYPE_NUMBER || type == Token::TYPE_LETTER)
|
||||
tokens[i++] = tokens[j];
|
||||
}
|
||||
|
||||
if (i != 0)
|
||||
{
|
||||
tokens.resize(i);
|
||||
sort(tokens.begin() + 1, tokens.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
sort(tokens.begin(), tokens.end());
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true when a sequence denoted by [b2, e2) is a subsequence
|
||||
// of [b1, e1).
|
||||
template <typename T1, typename T2>
|
||||
bool IsSubsequence(T1 b1, T1 e1, T2 b2, T2 e2)
|
||||
{
|
||||
for (; b2 != e2; ++b1, ++b2)
|
||||
{
|
||||
while (b1 != e1 && *b1 < *b2)
|
||||
++b1;
|
||||
if (b1 == e1 || *b1 != *b2)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsBuildingPartSynonym(UniString const & s)
|
||||
{
|
||||
static BuildingPartSynonymsMatcher const kMatcher;
|
||||
return kMatcher.Has(s);
|
||||
}
|
||||
|
||||
bool IsShortBuildingSynonym(UniString const & t)
|
||||
{
|
||||
static UniString const kSynonyms[] = {MakeUniString("к"), MakeUniString("с")};
|
||||
for (auto const & s : kSynonyms)
|
||||
if (t == s)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachGroup(TokensT const & ts, Fn && fn)
|
||||
{
|
||||
size_t i = 0;
|
||||
while (i < ts.size())
|
||||
{
|
||||
while (i < ts.size() && ts[i].m_type == Token::TYPE_GROUP_SEPARATOR)
|
||||
++i;
|
||||
|
||||
size_t j = i;
|
||||
while (j < ts.size() && ts[j].m_type != Token::TYPE_GROUP_SEPARATOR)
|
||||
++j;
|
||||
|
||||
if (i != j)
|
||||
fn(i, j);
|
||||
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
void TransformString(UniString && token, Fn && fn)
|
||||
{
|
||||
static UniString const kLiter = MakeUniString("лит");
|
||||
|
||||
size_t const size = token.size();
|
||||
|
||||
if (IsBuildingPartSynonym(token))
|
||||
{
|
||||
fn(std::move(token), Token::TYPE_BUILDING_PART);
|
||||
}
|
||||
else if (size == 4 && StartsWith(token, kLiter))
|
||||
{
|
||||
fn(UniString(token.begin(), token.begin() + 3), Token::TYPE_BUILDING_PART);
|
||||
fn(UniString(token.begin() + 3, token.end()), Token::TYPE_LETTER);
|
||||
}
|
||||
else if (size == 2)
|
||||
{
|
||||
UniString firstLetter(token.begin(), token.begin() + 1);
|
||||
if (IsShortBuildingSynonym(firstLetter))
|
||||
{
|
||||
fn(std::move(firstLetter), Token::TYPE_BUILDING_PART);
|
||||
fn(UniString(token.begin() + 1, token.end()), Token::TYPE_LETTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
fn(std::move(token), Token::TYPE_STRING);
|
||||
}
|
||||
}
|
||||
else if (size == 1)
|
||||
{
|
||||
if (IsShortBuildingSynonym(token))
|
||||
fn(std::move(token), Token::TYPE_BUILDING_PART_OR_LETTER);
|
||||
else
|
||||
fn(std::move(token), Token::TYPE_LETTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
fn(std::move(token), Token::TYPE_STRING);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
uint64_t ToUInt(UniString const & s)
|
||||
{
|
||||
uint64_t res = 0;
|
||||
uint64_t pow = 1;
|
||||
|
||||
int i = int(s.size()) - 1;
|
||||
ASSERT(i >= 0 && i < std::numeric_limits<uint64_t>::digits10, (i));
|
||||
for (; i >= 0; --i)
|
||||
{
|
||||
ASSERT(IsASCIIDigit(s[i]), (s[i]));
|
||||
|
||||
res += (s[i] - '0') * pow;
|
||||
pow *= 10;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void Tokenize(UniString s, bool isPrefix, TokensT & ts)
|
||||
{
|
||||
MakeLowerCaseInplace(s);
|
||||
auto addToken = [&ts](UniString && value, Token::Type type) { ts.emplace_back(std::move(value), type); };
|
||||
|
||||
size_t i = 0;
|
||||
while (i < s.size())
|
||||
{
|
||||
Token::Type const type = GetCharType(s[i]);
|
||||
|
||||
size_t j = i + 1;
|
||||
while (j < s.size() && GetCharType(s[j]) == type)
|
||||
++j;
|
||||
|
||||
if (type != Token::TYPE_SEPARATOR)
|
||||
{
|
||||
UniString token(s.begin() + i, s.begin() + j);
|
||||
if (type == Token::TYPE_STRING)
|
||||
{
|
||||
if (j != s.size() || !isPrefix)
|
||||
{
|
||||
TransformString(std::move(token), addToken);
|
||||
}
|
||||
else if (i + 1 == j)
|
||||
{
|
||||
ts.emplace_back(std::move(token), Token::TYPE_LETTER);
|
||||
}
|
||||
else
|
||||
{
|
||||
ts.emplace_back(std::move(token), Token::TYPE_STRING);
|
||||
ts.back().m_prefix = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
addToken(std::move(token), type);
|
||||
}
|
||||
}
|
||||
|
||||
i = j;
|
||||
}
|
||||
|
||||
// Quite hacky loop from ts.size() - 1 towards 0.
|
||||
for (size_t i = ts.size() - 1; i < ts.size(); --i)
|
||||
{
|
||||
if (ts[i].m_type != Token::TYPE_BUILDING_PART_OR_LETTER)
|
||||
continue;
|
||||
if (i + 1 == ts.size() || ts[i + 1].m_type == Token::TYPE_BUILDING_PART)
|
||||
ts[i].m_type = Token::TYPE_LETTER;
|
||||
else if (ts[i + 1].m_type == Token::TYPE_NUMBER)
|
||||
ts[i].m_type = Token::TYPE_BUILDING_PART;
|
||||
}
|
||||
}
|
||||
|
||||
void ParseHouseNumber(UniString const & s, vector<TokensT> & parses)
|
||||
{
|
||||
TokensT tokens;
|
||||
Tokenize(s, false /* isPrefix */, tokens);
|
||||
|
||||
bool numbersSequence = true;
|
||||
ForEachGroup(tokens, [&tokens, &numbersSequence](size_t i, size_t j)
|
||||
{
|
||||
switch (j - i)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: numbersSequence = numbersSequence && tokens[i].m_type == Token::TYPE_NUMBER; break;
|
||||
case 2:
|
||||
numbersSequence =
|
||||
numbersSequence && tokens[i].m_type == Token::TYPE_NUMBER && IsLiteralType(tokens[i + 1].m_type);
|
||||
break;
|
||||
default: numbersSequence = false; break;
|
||||
}
|
||||
});
|
||||
|
||||
size_t const oldSize = parses.size();
|
||||
if (numbersSequence)
|
||||
{
|
||||
ForEachGroup(tokens, [&tokens, &parses](size_t i, size_t j)
|
||||
{
|
||||
parses.emplace_back();
|
||||
auto & parse = parses.back();
|
||||
for (size_t k = i; k < j; ++k)
|
||||
parse.emplace_back(std::move(tokens[k]));
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
parses.emplace_back(std::move(tokens));
|
||||
}
|
||||
|
||||
for (size_t i = oldSize; i < parses.size(); ++i)
|
||||
SimplifyParse(parses[i]);
|
||||
}
|
||||
|
||||
void ParseQuery(UniString const & query, bool queryIsPrefix, TokensT & parse)
|
||||
{
|
||||
Tokenize(query, queryIsPrefix, parse);
|
||||
SimplifyParse(parse);
|
||||
}
|
||||
|
||||
bool HouseNumbersMatch(UniString const & houseNumber, TokensT const & queryParse)
|
||||
{
|
||||
ASSERT(!houseNumber.empty() && !queryParse.empty(), ());
|
||||
|
||||
// Fast pre-check, helps to early exit without complex house number parsing.
|
||||
if (IsASCIIDigit(houseNumber[0]) && IsASCIIDigit(queryParse[0].m_value[0]) &&
|
||||
houseNumber[0] != queryParse[0].m_value[0])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<TokensT> houseNumberParses;
|
||||
ParseHouseNumber(houseNumber, houseNumberParses);
|
||||
|
||||
for (auto & parse : houseNumberParses)
|
||||
{
|
||||
if (parse.empty())
|
||||
continue;
|
||||
if (parse[0] == queryParse[0] &&
|
||||
(IsSubsequence(parse.begin() + 1, parse.end(), queryParse.begin() + 1, queryParse.end()) ||
|
||||
IsSubsequence(queryParse.begin() + 1, queryParse.end(), parse.begin() + 1, parse.end())))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HouseNumbersMatchConscription(UniString const & houseNumber, TokensT const & queryParse)
|
||||
{
|
||||
auto const beg = houseNumber.begin();
|
||||
auto const end = houseNumber.end();
|
||||
auto i = std::find(beg, end, '/');
|
||||
if (i != end)
|
||||
{
|
||||
// Conscription number / street number.
|
||||
return HouseNumbersMatch(UniString(beg, i), queryParse) || HouseNumbersMatch(UniString(i + 1, end), queryParse);
|
||||
}
|
||||
return HouseNumbersMatch(houseNumber, queryParse);
|
||||
}
|
||||
|
||||
bool HouseNumbersMatchRange(std::string_view const & hnRange, TokensT const & queryParse,
|
||||
feature::InterpolType interpol)
|
||||
{
|
||||
ASSERT(!queryParse.empty() && interpol != feature::InterpolType::None, ());
|
||||
|
||||
if (queryParse[0].m_type != Token::TYPE_NUMBER)
|
||||
return false;
|
||||
|
||||
uint64_t const val = ToUInt(queryParse[0].m_value);
|
||||
bool const isEven = (val % 2 == 0);
|
||||
if (interpol == feature::InterpolType::Odd && isEven)
|
||||
return false;
|
||||
if (interpol == feature::InterpolType::Even && !isEven)
|
||||
return false;
|
||||
|
||||
// Generator makes valid normalized values.
|
||||
size_t const i = hnRange.find(':');
|
||||
if (i == std::string_view::npos)
|
||||
{
|
||||
ASSERT(false, (hnRange));
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t left, right;
|
||||
if (!strings::to_uint(hnRange.substr(0, i), left) || !strings::to_uint(hnRange.substr(i + 1), right))
|
||||
{
|
||||
ASSERT(false, (hnRange));
|
||||
return false;
|
||||
}
|
||||
|
||||
return left < val && val < right;
|
||||
}
|
||||
|
||||
bool LooksLikeHouseNumber(UniString const & s, bool isPrefix)
|
||||
{
|
||||
static HouseNumberClassifier const classifier;
|
||||
return classifier.LooksGood(s, isPrefix);
|
||||
}
|
||||
|
||||
bool LooksLikeHouseNumber(string const & s, bool isPrefix)
|
||||
{
|
||||
return LooksLikeHouseNumber(MakeUniString(s), isPrefix);
|
||||
}
|
||||
|
||||
bool LooksLikeHouseNumberStrict(UniString const & s)
|
||||
{
|
||||
static HouseNumberClassifier const classifier(g_patternsStrict);
|
||||
return classifier.LooksGood(s, false /* isPrefix */);
|
||||
}
|
||||
|
||||
bool LooksLikeHouseNumberStrict(string const & s)
|
||||
{
|
||||
return LooksLikeHouseNumberStrict(MakeUniString(s));
|
||||
}
|
||||
|
||||
string DebugPrint(Token::Type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Token::TYPE_NUMBER: return "Number";
|
||||
case Token::TYPE_SEPARATOR: return "Separator";
|
||||
case Token::TYPE_GROUP_SEPARATOR: return "GroupSeparator";
|
||||
case Token::TYPE_HYPHEN: return "Hyphen";
|
||||
case Token::TYPE_SLASH: return "Slash";
|
||||
case Token::TYPE_STRING: return "String";
|
||||
case Token::TYPE_BUILDING_PART: return "BuildingPart";
|
||||
case Token::TYPE_LETTER: return "Letter";
|
||||
case Token::TYPE_BUILDING_PART_OR_LETTER: return "BuildingPartOrLetter";
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
string DebugPrint(Token const & token)
|
||||
{
|
||||
ostringstream os;
|
||||
os << "Token [" << DebugPrint(token.m_value) << ", " << DebugPrint(token.m_type) << "]";
|
||||
return os.str();
|
||||
}
|
||||
} // namespace house_numbers
|
||||
} // namespace search
|
||||
88
libs/search/house_numbers_matcher.hpp
Normal file
88
libs/search/house_numbers_matcher.hpp
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/feature_utils.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace house_numbers
|
||||
{
|
||||
struct Token
|
||||
{
|
||||
enum Type
|
||||
{
|
||||
TYPE_NUMBER,
|
||||
TYPE_SEPARATOR,
|
||||
TYPE_GROUP_SEPARATOR,
|
||||
TYPE_HYPHEN,
|
||||
TYPE_SLASH,
|
||||
TYPE_STRING,
|
||||
TYPE_BUILDING_PART,
|
||||
TYPE_LETTER,
|
||||
TYPE_BUILDING_PART_OR_LETTER
|
||||
};
|
||||
|
||||
Token() = default;
|
||||
Token(strings::UniString const & value, Type type) : m_value(value), m_type(type) {}
|
||||
Token(strings::UniString && value, Type type) : m_value(std::move(value)), m_type(type) {}
|
||||
Token(Token &&) = default;
|
||||
|
||||
Token & operator=(Token &&) = default;
|
||||
Token & operator=(Token const &) = default;
|
||||
|
||||
bool operator==(Token const & rhs) const { return m_type == rhs.m_type && m_value == rhs.m_value; }
|
||||
|
||||
bool operator!=(Token const & rhs) const { return !(*this == rhs); }
|
||||
|
||||
bool operator<(Token const & rhs) const
|
||||
{
|
||||
if (m_type != rhs.m_type)
|
||||
return m_type < rhs.m_type;
|
||||
return m_value < rhs.m_value;
|
||||
}
|
||||
|
||||
strings::UniString m_value;
|
||||
Type m_type = TYPE_SEPARATOR;
|
||||
bool m_prefix = false;
|
||||
};
|
||||
|
||||
using TokensT = std::vector<Token>;
|
||||
|
||||
// Used to convert Token::Type::TYPE_NUMBER into int value.
|
||||
uint64_t ToUInt(strings::UniString const & s);
|
||||
|
||||
// Tokenizes |s| that may be a house number.
|
||||
void Tokenize(strings::UniString s, bool isPrefix, TokensT & ts);
|
||||
|
||||
// Parses a string that can be one or more house numbers. This method
|
||||
// can be used to parse addr:housenumber fields.
|
||||
void ParseHouseNumber(strings::UniString const & s, std::vector<TokensT> & parses);
|
||||
|
||||
// Parses a part of search query that can be a house number.
|
||||
void ParseQuery(strings::UniString const & query, bool queryIsPrefix, TokensT & parse);
|
||||
|
||||
/// @return true if house number matches to a given parsed query.
|
||||
/// @{
|
||||
bool HouseNumbersMatch(strings::UniString const & houseNumber, TokensT const & queryParse);
|
||||
bool HouseNumbersMatchConscription(strings::UniString const & houseNumber, TokensT const & queryParse);
|
||||
bool HouseNumbersMatchRange(std::string_view const & hnRange, TokensT const & queryParse,
|
||||
feature::InterpolType interpol);
|
||||
/// @}
|
||||
|
||||
// Returns true if |s| looks like a house number.
|
||||
bool LooksLikeHouseNumber(strings::UniString const & s, bool isPrefix);
|
||||
bool LooksLikeHouseNumber(std::string const & s, bool isPrefix);
|
||||
|
||||
bool LooksLikeHouseNumberStrict(strings::UniString const & s);
|
||||
bool LooksLikeHouseNumberStrict(std::string const & s);
|
||||
|
||||
std::string DebugPrint(Token::Type type);
|
||||
|
||||
std::string DebugPrint(Token const & token);
|
||||
} // namespace house_numbers
|
||||
} // namespace search
|
||||
156
libs/search/house_to_street_table.cpp
Normal file
156
libs/search/house_to_street_table.cpp
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
#include "search/house_to_street_table.hpp"
|
||||
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include "platform/mwm_traits.hpp"
|
||||
|
||||
#include "coding/files_container.hpp"
|
||||
#include "coding/map_uint32_to_val.hpp"
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include "defines.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
class EliasFanoMap : public HouseToStreetTable
|
||||
{
|
||||
public:
|
||||
using Map = MapUint32ToValue<uint32_t>;
|
||||
|
||||
explicit EliasFanoMap(unique_ptr<Reader> && reader) : m_reader(std::move(reader))
|
||||
{
|
||||
ASSERT(m_reader, ());
|
||||
auto readBlockCallback = [](auto & source, uint32_t blockSize, vector<uint32_t> & values)
|
||||
{
|
||||
values.resize(blockSize);
|
||||
values[0] = ReadVarUint<uint32_t>(source);
|
||||
|
||||
for (size_t i = 1; i < blockSize && source.Size() > 0; ++i)
|
||||
{
|
||||
// Feature ids for all real features are less than numeric_limits<int32_t>::max()
|
||||
// so we can use delta coding with int32_t difference type.
|
||||
values[i] = base::asserted_cast<uint32_t>(values[i - 1] + ReadVarInt<int32_t>(source));
|
||||
}
|
||||
};
|
||||
|
||||
m_map = Map::Load(*m_reader, readBlockCallback);
|
||||
ASSERT(m_map.get(), ());
|
||||
}
|
||||
|
||||
// HouseToStreetTable overrides:
|
||||
std::optional<Result> Get(uint32_t houseId) const override
|
||||
{
|
||||
uint32_t fID;
|
||||
if (!m_map->Get(houseId, fID))
|
||||
return {};
|
||||
return {{fID, StreetIdType::FeatureId}};
|
||||
}
|
||||
|
||||
private:
|
||||
unique_ptr<Reader> m_reader;
|
||||
unique_ptr<Map> m_map;
|
||||
};
|
||||
|
||||
class DummyTable : public HouseToStreetTable
|
||||
{
|
||||
public:
|
||||
// HouseToStreetTable overrides:
|
||||
std::optional<Result> Get(uint32_t /* houseId */) const override { return {}; }
|
||||
};
|
||||
|
||||
unique_ptr<HouseToStreetTable> LoadHouseTableImpl(MwmValue const & value, std::string const & tag)
|
||||
{
|
||||
unique_ptr<HouseToStreetTable> result;
|
||||
|
||||
try
|
||||
{
|
||||
auto const format = version::MwmTraits(value.GetMwmVersion()).GetHouseToStreetTableFormat();
|
||||
CHECK_EQUAL(format, version::MwmTraits::HouseToStreetTableFormat::HouseToStreetTableWithHeader, ());
|
||||
|
||||
FilesContainerR::TReader reader = value.m_cont.GetReader(tag);
|
||||
|
||||
HouseToStreetTable::Header header;
|
||||
ReaderSource source(reader);
|
||||
header.Read(source);
|
||||
CHECK(header.m_version == HouseToStreetTable::Version::V2, ());
|
||||
|
||||
auto subreader = reader.GetPtr()->CreateSubReader(header.m_tableOffset, header.m_tableSize);
|
||||
CHECK(subreader, ());
|
||||
result = make_unique<EliasFanoMap>(std::move(subreader));
|
||||
}
|
||||
catch (Reader::OpenException const & ex)
|
||||
{
|
||||
LOG(LERROR, (ex.Msg()));
|
||||
}
|
||||
|
||||
if (!result)
|
||||
result = make_unique<DummyTable>();
|
||||
return result;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<HouseToStreetTable> LoadHouseToStreetTable(MwmValue const & value)
|
||||
{
|
||||
return LoadHouseTableImpl(value, FEATURE2STREET_FILE_TAG);
|
||||
}
|
||||
|
||||
std::unique_ptr<HouseToStreetTable> LoadHouseToPlaceTable(MwmValue const & value)
|
||||
{
|
||||
return LoadHouseTableImpl(value, FEATURE2PLACE_FILE_TAG);
|
||||
}
|
||||
|
||||
// HouseToStreetTableBuilder -----------------------------------------------------------------------
|
||||
void HouseToStreetTableBuilder::Put(uint32_t houseId, uint32_t streetId)
|
||||
{
|
||||
m_builder.Put(houseId, streetId);
|
||||
}
|
||||
|
||||
void HouseToStreetTableBuilder::Freeze(Writer & writer) const
|
||||
{
|
||||
uint64_t const startOffset = writer.Pos();
|
||||
CHECK(coding::IsAlign8(startOffset), ());
|
||||
|
||||
HouseToStreetTable::Header header;
|
||||
header.Serialize(writer);
|
||||
|
||||
uint64_t bytesWritten = writer.Pos();
|
||||
coding::WritePadding(writer, bytesWritten);
|
||||
|
||||
// Each street id is encoded as delta from some prediction.
|
||||
// First street id in the block encoded as VarUint, all other street ids in the block
|
||||
// encoded as VarInt delta from previous id
|
||||
auto const writeBlockCallback = [](auto & w, auto begin, auto end)
|
||||
{
|
||||
CHECK(begin != end, ());
|
||||
WriteVarUint(w, *begin);
|
||||
auto prevIt = begin;
|
||||
for (auto it = begin + 1; it != end; ++it)
|
||||
{
|
||||
int32_t const delta = base::asserted_cast<int32_t>(*it) - *prevIt;
|
||||
WriteVarInt(w, delta);
|
||||
prevIt = it;
|
||||
}
|
||||
};
|
||||
|
||||
header.m_tableOffset = base::asserted_cast<uint32_t>(writer.Pos() - startOffset);
|
||||
m_builder.Freeze(writer, writeBlockCallback);
|
||||
header.m_tableSize = base::asserted_cast<uint32_t>(writer.Pos() - header.m_tableOffset - startOffset);
|
||||
|
||||
auto const endOffset = writer.Pos();
|
||||
writer.Seek(startOffset);
|
||||
header.Serialize(writer);
|
||||
writer.Seek(endOffset);
|
||||
}
|
||||
} // namespace search
|
||||
25
libs/search/house_to_street_table.hpp
Normal file
25
libs/search/house_to_street_table.hpp
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
#include "indexer/house_to_street_iface.hpp"
|
||||
|
||||
#include "coding/map_uint32_to_val.hpp"
|
||||
|
||||
#include <memory>
|
||||
|
||||
class MwmValue;
|
||||
class Writer;
|
||||
|
||||
namespace search
|
||||
{
|
||||
std::unique_ptr<HouseToStreetTable> LoadHouseToStreetTable(MwmValue const & value);
|
||||
std::unique_ptr<HouseToStreetTable> LoadHouseToPlaceTable(MwmValue const & value);
|
||||
|
||||
class HouseToStreetTableBuilder
|
||||
{
|
||||
public:
|
||||
void Put(uint32_t featureId, uint32_t offset);
|
||||
void Freeze(Writer & writer) const;
|
||||
|
||||
private:
|
||||
MapUint32ToValueBuilder<uint32_t> m_builder;
|
||||
};
|
||||
} // namespace search
|
||||
24
libs/search/idf_map.cpp
Normal file
24
libs/search/idf_map.cpp
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#include "search/idf_map.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
IdfMap::IdfMap(Delegate const & delegate, double unknownIdf) : m_delegate(delegate), m_unknownIdf(unknownIdf)
|
||||
{
|
||||
ASSERT_GREATER(m_unknownIdf, 0.0, ());
|
||||
}
|
||||
|
||||
double IdfMap::GetImpl(Map & idfs, strings::UniString const & s, bool isPrefix)
|
||||
{
|
||||
auto const it = idfs.find(s);
|
||||
if (it != idfs.cend())
|
||||
return it->second;
|
||||
|
||||
auto const df = static_cast<double>(m_delegate.GetNumDocs(s, isPrefix));
|
||||
auto const idf = df == 0 ? m_unknownIdf : 1.0 / df;
|
||||
idfs[s] = idf;
|
||||
|
||||
return idf;
|
||||
}
|
||||
} // namespace search
|
||||
38
libs/search/idf_map.hpp
Normal file
38
libs/search/idf_map.hpp
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class IdfMap
|
||||
{
|
||||
public:
|
||||
struct Delegate
|
||||
{
|
||||
virtual ~Delegate() = default;
|
||||
|
||||
virtual uint64_t GetNumDocs(strings::UniString const & token, bool isPrefix) const = 0;
|
||||
};
|
||||
|
||||
IdfMap(Delegate const & delegate, double unknownIdf);
|
||||
|
||||
double Get(strings::UniString const & s, bool isPrefix)
|
||||
{
|
||||
return GetImpl(isPrefix ? m_prefixIdfs : m_fullIdfs, s, isPrefix);
|
||||
}
|
||||
|
||||
private:
|
||||
using Map = std::map<strings::UniString, double>;
|
||||
|
||||
double GetImpl(Map & idfs, strings::UniString const & s, bool isPrefix);
|
||||
|
||||
Map m_fullIdfs;
|
||||
Map m_prefixIdfs;
|
||||
|
||||
Delegate const & m_delegate;
|
||||
double m_unknownIdf;
|
||||
};
|
||||
} // namespace search
|
||||
349
libs/search/intermediate_result.cpp
Normal file
349
libs/search/intermediate_result.cpp
Normal file
|
|
@ -0,0 +1,349 @@
|
|||
#include "search/intermediate_result.hpp"
|
||||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "storage/country_info_getter.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/feature.hpp"
|
||||
#include "indexer/feature_algo.hpp"
|
||||
#include "indexer/feature_utils.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
#include "indexer/road_shields_parser.hpp"
|
||||
|
||||
#include "platform/localization.hpp"
|
||||
#include "platform/measurement_utils.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "3party/opening_hours/opening_hours.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
class SkipRegionInfo
|
||||
{
|
||||
static size_t constexpr kCount = 2;
|
||||
uint32_t m_types[kCount];
|
||||
|
||||
public:
|
||||
SkipRegionInfo()
|
||||
{
|
||||
base::StringIL arr[] = {{"place", "continent"}, {"place", "country"}};
|
||||
static_assert(kCount == ARRAY_SIZE(arr), "");
|
||||
|
||||
Classificator const & c = classif();
|
||||
for (size_t i = 0; i < kCount; ++i)
|
||||
m_types[i] = c.GetTypeByPath(arr[i]);
|
||||
}
|
||||
|
||||
bool IsSkip(uint32_t type) const
|
||||
{
|
||||
for (uint32_t t : m_types)
|
||||
if (t == type)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// PreRankerResult ---------------------------------------------------------------------------------
|
||||
PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & info,
|
||||
vector<ResultTracer::Branch> const & provenance)
|
||||
: m_id(id)
|
||||
, m_info(info)
|
||||
, m_isRelaxed(base::IsExist(provenance, ResultTracer::Branch::Relaxed))
|
||||
#ifdef SEARCH_USE_PROVENANCE
|
||||
, m_provenance(provenance)
|
||||
#endif
|
||||
{
|
||||
ASSERT(m_id.IsValid(), ());
|
||||
|
||||
m_matchedTokensNumber = 0;
|
||||
for (auto const & r : m_info.m_tokenRanges)
|
||||
m_matchedTokensNumber += r.Size();
|
||||
}
|
||||
|
||||
// static
|
||||
bool PreRankerResult::LessRankAndPopularity(PreRankerResult const & lhs, PreRankerResult const & rhs)
|
||||
{
|
||||
if (lhs.m_info.m_rank != rhs.m_info.m_rank)
|
||||
return lhs.m_info.m_rank > rhs.m_info.m_rank;
|
||||
if (lhs.m_info.m_popularity != rhs.m_info.m_popularity)
|
||||
return lhs.m_info.m_popularity > rhs.m_info.m_popularity;
|
||||
|
||||
/// @todo Remove this epilog when we will have _enough_ ranks and popularities in data.
|
||||
return lhs.m_info.m_distanceToPivot < rhs.m_info.m_distanceToPivot;
|
||||
}
|
||||
|
||||
// static
|
||||
bool PreRankerResult::LessDistance(PreRankerResult const & lhs, PreRankerResult const & rhs)
|
||||
{
|
||||
return lhs.m_info.m_distanceToPivot < rhs.m_info.m_distanceToPivot;
|
||||
}
|
||||
|
||||
// static
|
||||
int PreRankerResult::CompareByTokensMatch(PreRankerResult const & lhs, PreRankerResult const & rhs)
|
||||
{
|
||||
if (lhs.m_info.m_isCommonMatchOnly != rhs.m_info.m_isCommonMatchOnly)
|
||||
return rhs.m_info.m_isCommonMatchOnly ? -1 : 1;
|
||||
|
||||
auto const & lRange = lhs.m_info.InnermostTokenRange();
|
||||
auto const & rRange = rhs.m_info.InnermostTokenRange();
|
||||
|
||||
if (lRange.Size() != rRange.Size())
|
||||
return lRange.Size() > rRange.Size() ? -1 : 1;
|
||||
|
||||
if (lhs.m_matchedTokensNumber != rhs.m_matchedTokensNumber)
|
||||
return lhs.m_matchedTokensNumber > rhs.m_matchedTokensNumber ? -1 : 1;
|
||||
|
||||
if (lRange.Begin() != rRange.Begin())
|
||||
return lRange.Begin() < rRange.Begin() ? -1 : 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// static
|
||||
bool PreRankerResult::LessByExactMatch(PreRankerResult const & lhs, PreRankerResult const & rhs)
|
||||
{
|
||||
bool const lScore = lhs.m_info.m_exactMatch && lhs.m_info.m_allTokensUsed;
|
||||
bool const rScore = rhs.m_info.m_exactMatch && rhs.m_info.m_allTokensUsed;
|
||||
if (lScore != rScore)
|
||||
return lScore;
|
||||
|
||||
return CompareByTokensMatch(lhs, rhs) == -1;
|
||||
}
|
||||
|
||||
bool PreRankerResult::CategoriesComparator::operator()(PreRankerResult const & lhs, PreRankerResult const & rhs) const
|
||||
{
|
||||
if (m_positionIsInsideViewport)
|
||||
return lhs.GetDistance() < rhs.GetDistance();
|
||||
|
||||
if (m_detailedScale)
|
||||
{
|
||||
bool const lhsInside = m_viewport.IsPointInside(lhs.GetInfo().m_center);
|
||||
bool const rhsInside = m_viewport.IsPointInside(rhs.GetInfo().m_center);
|
||||
if (lhsInside && !rhsInside)
|
||||
return true;
|
||||
if (rhsInside && !lhsInside)
|
||||
return false;
|
||||
}
|
||||
return lhs.GetPopularity() > rhs.GetPopularity();
|
||||
}
|
||||
|
||||
std::string DebugPrint(PreRankerResult const & r)
|
||||
{
|
||||
ostringstream os;
|
||||
os << "PreRankerResult "
|
||||
<< "{ FID: " << r.GetId().m_index // index is enough here for debug purpose
|
||||
<< "; m_matchedTokensNumber: " << r.m_matchedTokensNumber << "; m_isRelaxed: " << r.m_isRelaxed << "; "
|
||||
<< DebugPrint(r.m_info) << " }";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
// RankerResult ------------------------------------------------------------------------------------
|
||||
RankerResult::RankerResult(FeatureType & ft, m2::PointD const & center, string displayName, string const & fileName)
|
||||
: m_types(ft)
|
||||
, m_str(std::move(displayName))
|
||||
, m_id(ft.GetID())
|
||||
, m_resultType(ftypes::IsBuildingChecker::Instance()(m_types) ? Type::Building : Type::Feature)
|
||||
, m_geomType(ft.GetGeomType())
|
||||
{
|
||||
ASSERT(m_id.IsValid(), ());
|
||||
ASSERT(!m_types.Empty(), ());
|
||||
|
||||
m_types.SortBySpec();
|
||||
|
||||
m_region.SetParams(fileName, center);
|
||||
|
||||
FillDetails(ft, m_str, m_details);
|
||||
}
|
||||
|
||||
RankerResult::RankerResult(FeatureType & ft, std::string const & fileName)
|
||||
: RankerResult(ft, feature::GetCenter(ft, FeatureType::WORST_GEOMETRY), std::string(ft.GetReadableName()), fileName)
|
||||
{}
|
||||
|
||||
RankerResult::RankerResult(double lat, double lon)
|
||||
: m_str("(" + measurement_utils::FormatLatLon(lat, lon) + ")")
|
||||
, m_resultType(Type::LatLon)
|
||||
{
|
||||
m_region.SetParams({}, mercator::FromLatLon(lat, lon));
|
||||
}
|
||||
|
||||
RankerResult::RankerResult(m2::PointD const & coord, string_view postcode)
|
||||
: m_str(postcode)
|
||||
, m_resultType(Type::Postcode)
|
||||
{
|
||||
strings::AsciiToUpper(m_str);
|
||||
m_region.SetParams({}, coord);
|
||||
}
|
||||
|
||||
bool RankerResult::GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype,
|
||||
storage::CountryId & countryId) const
|
||||
{
|
||||
static SkipRegionInfo const checker;
|
||||
if (checker.IsSkip(ftype))
|
||||
return false;
|
||||
return m_region.GetCountryId(infoGetter, countryId);
|
||||
}
|
||||
|
||||
bool RankerResult::IsEqualBasic(RankerResult const & r) const
|
||||
{
|
||||
return (m_geomType == r.m_geomType && GetRankingInfo().m_type == r.GetRankingInfo().m_type && m_str == r.m_str);
|
||||
}
|
||||
|
||||
bool RankerResult::IsEqualCommon(RankerResult const & r) const
|
||||
{
|
||||
return (IsEqualBasic(r) && GetBestType() == r.GetBestType());
|
||||
}
|
||||
|
||||
bool RankerResult::IsStreet() const
|
||||
{
|
||||
return ftypes::IsStreetOrSquareChecker::Instance()(m_types);
|
||||
}
|
||||
|
||||
uint32_t RankerResult::GetBestType(vector<uint32_t> const * preferredTypes /* = nullptr */) const
|
||||
{
|
||||
if (preferredTypes)
|
||||
{
|
||||
ASSERT(is_sorted(preferredTypes->begin(), preferredTypes->end()), ());
|
||||
for (uint32_t type : m_types)
|
||||
if (binary_search(preferredTypes->begin(), preferredTypes->end(), type))
|
||||
return type;
|
||||
}
|
||||
|
||||
return m_types.GetBestType();
|
||||
}
|
||||
|
||||
// RankerResult::RegionInfo ------------------------------------------------------------------------
|
||||
bool RankerResult::RegionInfo::GetCountryId(storage::CountryInfoGetter const & infoGetter,
|
||||
storage::CountryId & countryId) const
|
||||
{
|
||||
if (!m_countryId.empty())
|
||||
{
|
||||
countryId = m_countryId;
|
||||
return true;
|
||||
}
|
||||
|
||||
auto const id = infoGetter.GetRegionCountryId(m_point);
|
||||
if (id != storage::kInvalidCountryId)
|
||||
{
|
||||
countryId = id;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Functions ---------------------------------------------------------------------------------------
|
||||
void FillDetails(FeatureType & ft, std::string const & name, Result::Details & details)
|
||||
{
|
||||
if (details.m_isInitialized)
|
||||
return;
|
||||
|
||||
std::string_view airportIata = ft.GetMetadata(feature::Metadata::FMD_AIRPORT_IATA);
|
||||
|
||||
std::string brand{ft.GetMetadata(feature::Metadata::FMD_BRAND)};
|
||||
if (!brand.empty())
|
||||
{
|
||||
brand = platform::GetLocalizedBrandName(brand);
|
||||
|
||||
if (name.find(brand) != std::string::npos)
|
||||
brand.clear();
|
||||
}
|
||||
|
||||
/// @todo Avoid temporary string when OpeningHours (boost::spirit) will allow string_view.
|
||||
std::string const openHours(ft.GetMetadata(feature::Metadata::FMD_OPEN_HOURS));
|
||||
if (!openHours.empty())
|
||||
{
|
||||
using namespace osmoh;
|
||||
OpeningHours const oh((std::string(openHours)));
|
||||
if (oh.IsValid())
|
||||
{
|
||||
/// @todo We should check closed/open time for specific feature's timezone.
|
||||
time_t const now = time(nullptr);
|
||||
auto const info = oh.GetInfo(now);
|
||||
if (info.state != RuleState::Unknown)
|
||||
{
|
||||
// In else case value is osm::Unknown, it's set in preview's constructor.
|
||||
details.m_isOpenNow = (info.state == RuleState::Open) ? osm::Yes : osm::No;
|
||||
|
||||
details.m_minutesUntilOpen = (info.nextTimeOpen - now) / 60;
|
||||
details.m_minutesUntilClosed = (info.nextTimeClosed - now) / 60;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
feature::TypesHolder const typesHolder(ft);
|
||||
|
||||
std::string stars;
|
||||
uint8_t starsCount = 0;
|
||||
bool const isHotel = ftypes::IsHotelChecker::Instance()(typesHolder);
|
||||
if (isHotel && strings::to_uint(ft.GetMetadata(feature::Metadata::FMD_STARS), starsCount))
|
||||
stars = feature::FormatStars(starsCount);
|
||||
|
||||
auto const cuisines = feature::GetLocalizedCuisines(typesHolder);
|
||||
auto const cuisine = strings::JoinStrings(cuisines, feature::kFieldsSeparator);
|
||||
|
||||
auto const recycling =
|
||||
strings::JoinStrings(feature::GetLocalizedRecyclingTypes(typesHolder), feature::kFieldsSeparator);
|
||||
|
||||
auto const roadShields = ftypes::GetRoadShieldsNames(ft);
|
||||
auto const roadShield = strings::JoinStrings(roadShields, feature::kFieldsSeparator);
|
||||
|
||||
auto const fee = feature::GetLocalizedFeeType(typesHolder);
|
||||
|
||||
auto const elevation = feature::FormatElevation(ft.GetMetadata(feature::Metadata::FMD_ELE));
|
||||
|
||||
std::string description;
|
||||
|
||||
auto const append = [&description](std::string_view sv)
|
||||
{
|
||||
if (sv.empty())
|
||||
return;
|
||||
if (!description.empty())
|
||||
description += feature::kFieldsSeparator;
|
||||
description += sv;
|
||||
};
|
||||
|
||||
append(stars);
|
||||
append(airportIata);
|
||||
append(roadShield);
|
||||
append(brand);
|
||||
append(elevation);
|
||||
append(cuisine);
|
||||
append(recycling);
|
||||
append(fee);
|
||||
|
||||
details.m_description = std::move(description);
|
||||
|
||||
details.m_isInitialized = true;
|
||||
}
|
||||
|
||||
string DebugPrint(RankerResult const & r)
|
||||
{
|
||||
stringstream ss;
|
||||
ss << "RankerResult "
|
||||
<< "{ FID: " << r.GetID().m_index // index is enough here for debug purpose
|
||||
<< "; Name: " << r.GetName() << "; Type: " << classif().GetReadableObjectName(r.GetBestType())
|
||||
<< "; Linear model rank: " << r.GetLinearModelRank();
|
||||
|
||||
#ifdef SEARCH_USE_PROVENANCE
|
||||
if (!r.m_provenance.empty())
|
||||
ss << "; Provenance: " << ::DebugPrint(r.m_provenance);
|
||||
#endif
|
||||
|
||||
if (r.m_dbgInfo)
|
||||
ss << "; " << DebugPrint(*r.m_dbgInfo);
|
||||
else
|
||||
ss << "; " << DebugPrint(r.GetRankingInfo());
|
||||
|
||||
ss << " }";
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace search
|
||||
194
libs/search/intermediate_result.hpp
Normal file
194
libs/search/intermediate_result.hpp
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/pre_ranking_info.hpp"
|
||||
#include "search/ranking_info.hpp"
|
||||
#include "search/result.hpp"
|
||||
#include "search/tracer.hpp"
|
||||
|
||||
#include "storage/storage_defines.hpp"
|
||||
|
||||
#include "indexer/feature_data.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class FeatureType;
|
||||
|
||||
namespace storage
|
||||
{
|
||||
class CountryInfoGetter;
|
||||
struct CountryInfo;
|
||||
} // namespace storage
|
||||
|
||||
namespace search
|
||||
{
|
||||
class ReverseGeocoder;
|
||||
|
||||
// First pass results class. Objects are created during search in trie.
|
||||
// Works fast because it does not load features.
|
||||
class PreRankerResult
|
||||
{
|
||||
public:
|
||||
PreRankerResult(FeatureID const & id, PreRankingInfo const & info,
|
||||
std::vector<ResultTracer::Branch> const & provenance);
|
||||
|
||||
/// @name Compare functions.
|
||||
/// @return true (-1) if lhs is better (less in sort) than rhs.
|
||||
/// @{
|
||||
static bool LessRankAndPopularity(PreRankerResult const & lhs, PreRankerResult const & rhs);
|
||||
static bool LessDistance(PreRankerResult const & lhs, PreRankerResult const & rhs);
|
||||
static int CompareByTokensMatch(PreRankerResult const & lhs, PreRankerResult const & rhs);
|
||||
static bool LessByExactMatch(PreRankerResult const & lhs, PreRankerResult const & rhs);
|
||||
/// @}
|
||||
|
||||
struct CategoriesComparator
|
||||
{
|
||||
bool operator()(PreRankerResult const & lhs, PreRankerResult const & rhs) const;
|
||||
|
||||
m2::RectD m_viewport;
|
||||
bool m_positionIsInsideViewport = false;
|
||||
bool m_detailedScale = false;
|
||||
};
|
||||
|
||||
FeatureID const & GetId() const { return m_id; }
|
||||
double GetDistance() const { return m_info.m_distanceToPivot; }
|
||||
uint8_t GetRank() const { return m_info.m_rank; }
|
||||
uint8_t GetPopularity() const { return m_info.m_popularity; }
|
||||
PreRankingInfo const & GetInfo() const { return m_info; }
|
||||
|
||||
#ifdef SEARCH_USE_PROVENANCE
|
||||
std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }
|
||||
#endif
|
||||
|
||||
// size_t GetInnermostTokensNumber() const { return m_info.InnermostTokenRange().Size(); }
|
||||
// size_t GetMatchedTokensNumber() const { return m_matchedTokensNumber; }
|
||||
bool IsNotRelaxed() const { return !m_isRelaxed; }
|
||||
|
||||
bool SkipForViewportSearch(size_t queryTokensNumber) const
|
||||
{
|
||||
return m_isRelaxed || m_matchedTokensNumber + 1 < queryTokensNumber;
|
||||
}
|
||||
|
||||
void SetRank(uint8_t rank) { m_info.m_rank = rank; }
|
||||
void SetPopularity(uint8_t popularity) { m_info.m_popularity = popularity; }
|
||||
void SetDistanceToPivot(double distance) { m_info.m_distanceToPivot = distance; }
|
||||
void SetCenter(m2::PointD const & center)
|
||||
{
|
||||
m_info.m_center = center;
|
||||
m_info.m_centerLoaded = true;
|
||||
}
|
||||
|
||||
friend std::string DebugPrint(PreRankerResult const & r);
|
||||
|
||||
private:
|
||||
FeatureID m_id;
|
||||
PreRankingInfo m_info;
|
||||
|
||||
size_t m_matchedTokensNumber;
|
||||
bool m_isRelaxed;
|
||||
|
||||
#ifdef SEARCH_USE_PROVENANCE
|
||||
// The call path in the Geocoder that leads to this result.
|
||||
std::vector<ResultTracer::Branch> m_provenance;
|
||||
#endif
|
||||
};
|
||||
|
||||
// Second result class. Objects are created during reading of features.
|
||||
// Read and fill needed info for ranking and getting final results.
|
||||
class RankerResult
|
||||
{
|
||||
public:
|
||||
enum class Type : uint8_t
|
||||
{
|
||||
LatLon = 0,
|
||||
Feature,
|
||||
Building, //!< Buildings are not filtered out in duplicates filter.
|
||||
Postcode
|
||||
};
|
||||
|
||||
/// For Type::Feature and Type::Building.
|
||||
RankerResult(FeatureType & ft, m2::PointD const & center, std::string displayName, std::string const & fileName);
|
||||
RankerResult(FeatureType & ft, std::string const & fileName);
|
||||
|
||||
/// For Type::LatLon.
|
||||
RankerResult(double lat, double lon);
|
||||
|
||||
/// For Type::Postcode.
|
||||
RankerResult(m2::PointD const & coord, std::string_view postcode);
|
||||
|
||||
bool IsStreet() const;
|
||||
|
||||
StoredRankingInfo const & GetRankingInfo() const { return m_info; }
|
||||
void SetRankingInfo(RankingInfo const & info, bool viewportMode)
|
||||
{
|
||||
m_finalRank = info.GetLinearModelRank(viewportMode);
|
||||
m_info = info;
|
||||
}
|
||||
|
||||
FeatureID const & GetID() const { return m_id; }
|
||||
std::string const & GetName() const { return m_str; }
|
||||
feature::TypesHolder const & GetTypes() const { return m_types; }
|
||||
Type GetResultType() const { return m_resultType; }
|
||||
m2::PointD GetCenter() const { return m_region.m_point; }
|
||||
feature::GeomType GetGeomType() const { return m_geomType; }
|
||||
Result::Details GetDetails() const { return m_details; }
|
||||
|
||||
double GetDistanceToPivot() const { return m_info.m_distanceToPivot; }
|
||||
double GetLinearModelRank() const { return m_finalRank; }
|
||||
|
||||
bool GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype,
|
||||
storage::CountryId & countryId) const;
|
||||
|
||||
bool IsEqualBasic(RankerResult const & r) const;
|
||||
bool IsEqualCommon(RankerResult const & r) const;
|
||||
|
||||
uint32_t GetBestType(std::vector<uint32_t> const * preferredTypes = nullptr) const;
|
||||
|
||||
#ifdef SEARCH_USE_PROVENANCE
|
||||
std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }
|
||||
#endif
|
||||
|
||||
friend std::string DebugPrint(RankerResult const & r);
|
||||
|
||||
private:
|
||||
friend class RankerResultMaker;
|
||||
friend class Ranker;
|
||||
|
||||
struct RegionInfo
|
||||
{
|
||||
storage::CountryId m_countryId;
|
||||
m2::PointD m_point;
|
||||
|
||||
void SetParams(storage::CountryId const & countryId, m2::PointD const & point)
|
||||
{
|
||||
m_countryId = countryId;
|
||||
m_point = point;
|
||||
}
|
||||
|
||||
bool GetCountryId(storage::CountryInfoGetter const & infoGetter, storage::CountryId & countryId) const;
|
||||
};
|
||||
|
||||
RegionInfo m_region;
|
||||
feature::TypesHolder m_types;
|
||||
std::string m_str;
|
||||
Result::Details m_details;
|
||||
|
||||
StoredRankingInfo m_info;
|
||||
std::shared_ptr<RankingInfo> m_dbgInfo; // used in debug logs and tests, nullptr in production
|
||||
|
||||
FeatureID m_id;
|
||||
double m_finalRank;
|
||||
|
||||
Type m_resultType;
|
||||
feature::GeomType m_geomType = feature::GeomType::Undefined;
|
||||
|
||||
#ifdef SEARCH_USE_PROVENANCE
|
||||
// The call path in the Geocoder that leads to this result.
|
||||
std::vector<ResultTracer::Branch> m_provenance;
|
||||
#endif
|
||||
};
|
||||
|
||||
void FillDetails(FeatureType & ft, std::string const & name, Result::Details & details);
|
||||
} // namespace search
|
||||
74
libs/search/intersection_result.cpp
Normal file
74
libs/search/intersection_result.cpp
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
#include "search/intersection_result.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// static
|
||||
uint32_t const IntersectionResult::kInvalidId;
|
||||
|
||||
void IntersectionResult::Set(Model::Type type, uint32_t id)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Model::TYPE_SUBPOI: m_subpoi = id; break;
|
||||
case Model::TYPE_COMPLEX_POI: m_complexPoi = id; break;
|
||||
case Model::TYPE_BUILDING: m_building = id; break;
|
||||
case Model::TYPE_STREET: m_street = id; break;
|
||||
case Model::TYPE_SUBURB: m_suburb = id; break;
|
||||
|
||||
/// @todo Store city (place) name for ranking? I suspect that it should work fine now, without it.
|
||||
case Model::TYPE_CITY: break;
|
||||
|
||||
case Model::TYPE_VILLAGE:
|
||||
case Model::TYPE_STATE:
|
||||
case Model::TYPE_COUNTRY:
|
||||
case Model::TYPE_UNCLASSIFIED:
|
||||
case Model::TYPE_COUNT: ASSERT(false, ("Unsupported type.")); break;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t IntersectionResult::InnermostResult() const
|
||||
{
|
||||
if (m_subpoi != kInvalidId)
|
||||
return m_subpoi;
|
||||
if (m_complexPoi != kInvalidId)
|
||||
return m_complexPoi;
|
||||
if (m_building != kInvalidId)
|
||||
return m_building;
|
||||
if (m_street != kInvalidId)
|
||||
return m_street;
|
||||
if (m_suburb != kInvalidId)
|
||||
return m_suburb;
|
||||
return kInvalidId;
|
||||
}
|
||||
|
||||
void IntersectionResult::Clear()
|
||||
{
|
||||
m_subpoi = kInvalidId;
|
||||
m_complexPoi = kInvalidId;
|
||||
m_building = kInvalidId;
|
||||
m_street = kInvalidId;
|
||||
m_suburb = kInvalidId;
|
||||
}
|
||||
|
||||
std::string DebugPrint(IntersectionResult const & result)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "IntersectionResult [ ";
|
||||
if (result.m_subpoi != IntersectionResult::kInvalidId)
|
||||
os << "SUBPOI:" << result.m_subpoi << " ";
|
||||
if (result.m_complexPoi != IntersectionResult::kInvalidId)
|
||||
os << "COMPLEX_POI:" << result.m_complexPoi << " ";
|
||||
if (result.m_building != IntersectionResult::kInvalidId)
|
||||
os << "BUILDING:" << result.m_building << " ";
|
||||
if (result.m_street != IntersectionResult::kInvalidId)
|
||||
os << "STREET:" << result.m_street << " ";
|
||||
if (result.m_suburb != IntersectionResult::kInvalidId)
|
||||
os << "SUBURB:" << result.m_suburb << " ";
|
||||
os << "]";
|
||||
return os.str();
|
||||
}
|
||||
} // namespace search
|
||||
42
libs/search/intersection_result.hpp
Normal file
42
libs/search/intersection_result.hpp
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/model.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// This class holds higher-level features for an intersection result,
|
||||
// i.e. BUILDING and STREET for POI or STREET for BUILDING.
|
||||
struct IntersectionResult
|
||||
{
|
||||
static uint32_t constexpr kInvalidId = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
void Set(Model::Type type, uint32_t id);
|
||||
|
||||
// Returns the first valid feature among the [SUBPOI, COMPLEX_POI, BUILDING, STREET].
|
||||
uint32_t InnermostResult() const;
|
||||
|
||||
// Returns true when at least one valid feature exists.
|
||||
inline bool IsValid() const { return InnermostResult() != kInvalidId; }
|
||||
|
||||
// Building == Streets means that we have actual street result, but got here
|
||||
// via _fake_ TYPE_BUILDING layer (see MatchPOIsAndBuildings).
|
||||
inline bool IsFakeBuildingButStreet() const { return m_building != kInvalidId && m_building == m_street; }
|
||||
|
||||
inline bool IsPoiAndComplexPoi() const { return m_complexPoi != kInvalidId && m_subpoi != kInvalidId; }
|
||||
|
||||
// Clears all fields to an invalid state.
|
||||
void Clear();
|
||||
|
||||
uint32_t m_subpoi = kInvalidId;
|
||||
uint32_t m_complexPoi = kInvalidId;
|
||||
uint32_t m_building = kInvalidId;
|
||||
uint32_t m_street = kInvalidId;
|
||||
uint32_t m_suburb = kInvalidId;
|
||||
};
|
||||
|
||||
std::string DebugPrint(IntersectionResult const & result);
|
||||
} // namespace search
|
||||
144
libs/search/interval_set.hpp
Normal file
144
libs/search/interval_set.hpp
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// todo(@m) Move to search/base?
|
||||
namespace search
|
||||
{
|
||||
// This class represents a set of disjoint intervals in the form
|
||||
// [begin, end). Note that neighbour intervals are always coalesced,
|
||||
// so while [0, 1), [1, 2) and [2, 3) are disjoint, after addition to
|
||||
// the set they will be stored as a single [0, 3).
|
||||
template <typename Elem>
|
||||
class IntervalSet
|
||||
{
|
||||
public:
|
||||
using Interval = std::pair<Elem, Elem>;
|
||||
|
||||
// Adds an |interval| to the set, coalescing adjacent intervals if needed.
|
||||
//
|
||||
// Complexity: O(num of intervals intersecting with |interval| +
|
||||
// log(total number of intervals)).
|
||||
void Add(Interval const & interval);
|
||||
|
||||
// Subtracts set from an |interval| and appends result to
|
||||
// |difference|.
|
||||
//
|
||||
// Complexity: O(num of intervals intersecting with |interval| +
|
||||
// log(total number of intervals)).
|
||||
void SubtractFrom(Interval const & interval, std::vector<Interval> & difference) const;
|
||||
|
||||
// Returns all elements of the set as a set of intervals.
|
||||
//
|
||||
// Complexity: O(1).
|
||||
inline std::set<Interval> const & Elems() const { return m_intervals; }
|
||||
|
||||
private:
|
||||
using Iterator = typename std::set<Interval>::iterator;
|
||||
|
||||
// Calculates range of intervals that have non-empty intersection with a given |interval|.
|
||||
void Cover(Interval const & interval, Iterator & begin, Iterator & end) const;
|
||||
|
||||
// This is a set of disjoint intervals.
|
||||
std::set<Interval> m_intervals;
|
||||
};
|
||||
|
||||
template <typename Elem>
|
||||
void IntervalSet<Elem>::Add(Interval const & interval)
|
||||
{
|
||||
// Skips empty intervals.
|
||||
if (interval.first == interval.second)
|
||||
return;
|
||||
|
||||
Iterator begin;
|
||||
Iterator end;
|
||||
Cover(interval, begin, end);
|
||||
|
||||
Elem from = interval.first;
|
||||
Elem to = interval.second;
|
||||
|
||||
// Updates |from| and |to| in accordance with corner intervals (if any).
|
||||
if (begin != end)
|
||||
{
|
||||
if (begin->first < from)
|
||||
from = begin->first;
|
||||
|
||||
auto last = end;
|
||||
--last;
|
||||
if (last->second > to)
|
||||
to = last->second;
|
||||
}
|
||||
|
||||
// Now all elements [from, to) can be added to the set as a single
|
||||
// interval which will replace all intervals in [begin, end). But
|
||||
// note that it can be possible to merge new interval with its
|
||||
// neighbors, so following code checks it.
|
||||
if (begin != m_intervals.begin())
|
||||
{
|
||||
auto prevBegin = begin;
|
||||
--prevBegin;
|
||||
if (prevBegin->second == from)
|
||||
{
|
||||
begin = prevBegin;
|
||||
from = prevBegin->first;
|
||||
}
|
||||
}
|
||||
if (end != m_intervals.end() && end->first == to)
|
||||
{
|
||||
to = end->second;
|
||||
++end;
|
||||
}
|
||||
|
||||
m_intervals.erase(begin, end);
|
||||
m_intervals.emplace(from, to);
|
||||
}
|
||||
|
||||
template <typename Elem>
|
||||
void IntervalSet<Elem>::SubtractFrom(Interval const & interval, std::vector<Interval> & difference) const
|
||||
{
|
||||
Iterator begin;
|
||||
Iterator end;
|
||||
|
||||
Cover(interval, begin, end);
|
||||
|
||||
Elem from = interval.first;
|
||||
Elem const to = interval.second;
|
||||
|
||||
for (auto it = begin; it != end && from < to; ++it)
|
||||
{
|
||||
if (it->first > from)
|
||||
{
|
||||
difference.emplace_back(from, it->first);
|
||||
from = it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
from = std::max(from, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
if (from < to)
|
||||
difference.emplace_back(from, to);
|
||||
}
|
||||
|
||||
template <typename Elem>
|
||||
void IntervalSet<Elem>::Cover(Interval const & interval, Iterator & begin, Iterator & end) const
|
||||
{
|
||||
Elem const & from = interval.first;
|
||||
Elem const & to = interval.second;
|
||||
|
||||
begin = m_intervals.lower_bound(std::make_pair(from, from));
|
||||
if (begin != m_intervals.begin())
|
||||
{
|
||||
auto prev = begin;
|
||||
--prev;
|
||||
if (prev->second > from)
|
||||
begin = prev;
|
||||
}
|
||||
|
||||
end = m_intervals.lower_bound(std::make_pair(to, to));
|
||||
}
|
||||
} // namespace search
|
||||
89
libs/search/keyword_lang_matcher.cpp
Normal file
89
libs/search/keyword_lang_matcher.cpp
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#include "keyword_lang_matcher.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace search
|
||||
{
|
||||
// KeywordLangMatcher::Score ----------------------------------------------------------------------
|
||||
KeywordLangMatcher::Score::Score() : m_langScore(numeric_limits<int>::min()) {}
|
||||
|
||||
KeywordLangMatcher::Score::Score(KeywordMatcher::Score const & score, int langScore)
|
||||
: m_parentScore(score)
|
||||
, m_langScore(langScore)
|
||||
{}
|
||||
|
||||
bool KeywordLangMatcher::Score::operator<(KeywordLangMatcher::Score const & score) const
|
||||
{
|
||||
if (m_parentScore != score.m_parentScore)
|
||||
return m_parentScore < score.m_parentScore;
|
||||
|
||||
if (m_langScore != score.m_langScore)
|
||||
return m_langScore < score.m_langScore;
|
||||
|
||||
return m_parentScore.LessInTokensLength(score.m_parentScore);
|
||||
}
|
||||
|
||||
bool KeywordLangMatcher::Score::operator<=(KeywordLangMatcher::Score const & score) const
|
||||
{
|
||||
return !(score < *this);
|
||||
}
|
||||
// KeywordLangMatcher ------------------------------------------------------------------------------
|
||||
KeywordLangMatcher::KeywordLangMatcher(size_t maxLanguageTiers) : m_languagePriorities(maxLanguageTiers)
|
||||
{
|
||||
// Should we ever have this many tiers, the idea of storing a vector of vectors must be revised.
|
||||
ASSERT_LESS(maxLanguageTiers, 10, ());
|
||||
}
|
||||
|
||||
void KeywordLangMatcher::SetLanguages(size_t tier, std::vector<int8_t> && languages)
|
||||
{
|
||||
ASSERT_LESS(tier, m_languagePriorities.size(), ());
|
||||
m_languagePriorities[tier] = std::move(languages);
|
||||
}
|
||||
|
||||
int KeywordLangMatcher::CalcLangScore(int8_t lang) const
|
||||
{
|
||||
int const numTiers = static_cast<int>(m_languagePriorities.size());
|
||||
for (int i = 0; i < numTiers; ++i)
|
||||
{
|
||||
for (int8_t x : m_languagePriorities[i])
|
||||
if (x == lang)
|
||||
return -i;
|
||||
}
|
||||
|
||||
return -numTiers;
|
||||
}
|
||||
|
||||
KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, string_view name) const
|
||||
{
|
||||
return Score(m_keywordMatcher.CalcScore(name), CalcLangScore(lang));
|
||||
}
|
||||
|
||||
KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, strings::UniString const & name) const
|
||||
{
|
||||
return Score(m_keywordMatcher.CalcScore(name), CalcLangScore(lang));
|
||||
}
|
||||
|
||||
KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, strings::UniString const * tokens,
|
||||
size_t count) const
|
||||
{
|
||||
return Score(m_keywordMatcher.CalcScore(tokens, count), CalcLangScore(lang));
|
||||
}
|
||||
|
||||
// Functions ---------------------------------------------------------------------------------------
|
||||
string DebugPrint(KeywordLangMatcher::Score const & score)
|
||||
{
|
||||
ostringstream ss;
|
||||
ss << "KLM::Score(" << DebugPrint(score.m_parentScore) << ", LS=" << score.m_langScore << ")";
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace search
|
||||
64
libs/search/keyword_lang_matcher.hpp
Normal file
64
libs/search/keyword_lang_matcher.hpp
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/keyword_matcher.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class KeywordLangMatcher
|
||||
{
|
||||
public:
|
||||
class Score
|
||||
{
|
||||
public:
|
||||
Score();
|
||||
bool operator<(Score const & s) const;
|
||||
bool operator<=(Score const & s) const;
|
||||
|
||||
private:
|
||||
friend class KeywordLangMatcher;
|
||||
friend std::string DebugPrint(Score const & score);
|
||||
|
||||
Score(KeywordMatcher::Score const & score, int langScore);
|
||||
|
||||
KeywordMatcher::Score m_parentScore;
|
||||
int m_langScore;
|
||||
};
|
||||
|
||||
// Constructs a matcher that supports up to |maxLanguageTiers| tiers.
|
||||
// All languages in the same tier are considered equal.
|
||||
// The lower the tier is, the more important the languages in it are.
|
||||
explicit KeywordLangMatcher(size_t maxLanguageTiers);
|
||||
|
||||
// Defines the languages in the |tier| to be exactly |languages|.
|
||||
void SetLanguages(size_t const tier, std::vector<int8_t> && languages);
|
||||
|
||||
// Calls |fn| on every language in every tier. Does not make a distinction
|
||||
// between languages in different tiers.
|
||||
template <typename Fn>
|
||||
void ForEachLanguage(Fn && fn) const
|
||||
{
|
||||
for (auto const & langs : m_languagePriorities)
|
||||
for (int8_t lang : langs)
|
||||
fn(lang);
|
||||
}
|
||||
|
||||
// Store references to keywords from source array of strings.
|
||||
inline void SetKeywords(QueryString const & query) { m_keywordMatcher.SetKeywords(query); }
|
||||
|
||||
// Returns the Score of the name (greater is better).
|
||||
Score CalcScore(int8_t lang, std::string_view name) const;
|
||||
Score CalcScore(int8_t lang, strings::UniString const & name) const;
|
||||
Score CalcScore(int8_t lang, strings::UniString const * tokens, size_t count) const;
|
||||
|
||||
private:
|
||||
int CalcLangScore(int8_t lang) const;
|
||||
|
||||
std::vector<std::vector<int8_t>> m_languagePriorities;
|
||||
KeywordMatcher m_keywordMatcher;
|
||||
};
|
||||
} // namespace search
|
||||
166
libs/search/keyword_matcher.cpp
Normal file
166
libs/search/keyword_matcher.cpp
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
#include "search/keyword_matcher.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/buffer_vector.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
KeywordMatcher::KeywordMatcher()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
void KeywordMatcher::Clear()
|
||||
{
|
||||
m_keywords.clear();
|
||||
m_prefix.clear();
|
||||
}
|
||||
|
||||
void KeywordMatcher::SetKeywords(QueryString const & query)
|
||||
{
|
||||
m_keywords.assign(query.m_tokens.begin(), query.m_tokens.end());
|
||||
m_prefix = query.m_prefix;
|
||||
}
|
||||
|
||||
KeywordMatcher::Score KeywordMatcher::CalcScore(string_view name) const
|
||||
{
|
||||
return CalcScore(NormalizeAndSimplifyString(name));
|
||||
}
|
||||
|
||||
KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const & name) const
|
||||
{
|
||||
buffer_vector<strings::UniString, kMaxNumTokens> tokens;
|
||||
SplitUniString(name, base::MakeBackInsertFunctor(tokens), Delimiters());
|
||||
|
||||
return CalcScore(tokens.data(), tokens.size());
|
||||
}
|
||||
|
||||
KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const * tokens, size_t count) const
|
||||
{
|
||||
// Some names can have too many tokens. Trim them.
|
||||
count = min(count, kMaxNumTokens);
|
||||
|
||||
vector<bool> isQueryTokenMatched(m_keywords.size());
|
||||
vector<bool> isNameTokenMatched(count);
|
||||
uint32_t sumTokenMatchDistance = 0;
|
||||
int8_t prevTokenMatchDistance = 0;
|
||||
bool prefixMatched = true;
|
||||
|
||||
for (size_t i = 0; i < m_keywords.size(); ++i)
|
||||
{
|
||||
for (size_t j = 0; j < count && !isQueryTokenMatched[i]; ++j)
|
||||
{
|
||||
if (!isNameTokenMatched[j] && m_keywords[i] == tokens[j])
|
||||
{
|
||||
isQueryTokenMatched[i] = isNameTokenMatched[j] = true;
|
||||
int8_t const tokenMatchDistance = i - j;
|
||||
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
|
||||
prevTokenMatchDistance = tokenMatchDistance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_prefix.empty())
|
||||
{
|
||||
prefixMatched = false;
|
||||
for (size_t j = 0; j < count && !prefixMatched; ++j)
|
||||
{
|
||||
if (!isNameTokenMatched[j] &&
|
||||
strings::StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix.begin(), m_prefix.end()))
|
||||
{
|
||||
isNameTokenMatched[j] = prefixMatched = true;
|
||||
int8_t const tokenMatchDistance = int(m_keywords.size()) - j;
|
||||
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t numQueryTokensMatched = 0;
|
||||
for (size_t i = 0; i < isQueryTokenMatched.size(); ++i)
|
||||
if (isQueryTokenMatched[i])
|
||||
++numQueryTokensMatched;
|
||||
|
||||
Score score;
|
||||
score.m_fullQueryMatched = prefixMatched && (numQueryTokensMatched == isQueryTokenMatched.size());
|
||||
score.m_prefixMatched = prefixMatched;
|
||||
score.m_numQueryTokensAndPrefixMatched = numQueryTokensMatched + (prefixMatched ? 1 : 0);
|
||||
|
||||
score.m_nameTokensMatched = 0;
|
||||
score.m_nameTokensLength = 0;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
if (isNameTokenMatched[i])
|
||||
score.m_nameTokensMatched |= (1 << (kMaxNumTokens - 1 - i));
|
||||
score.m_nameTokensLength += tokens[i].size();
|
||||
}
|
||||
|
||||
score.m_sumTokenMatchDistance = sumTokenMatchDistance;
|
||||
return score;
|
||||
}
|
||||
|
||||
KeywordMatcher::Score::Score()
|
||||
: m_sumTokenMatchDistance(0)
|
||||
, m_nameTokensMatched(0)
|
||||
, m_nameTokensLength(0)
|
||||
, m_numQueryTokensAndPrefixMatched(0)
|
||||
, m_fullQueryMatched(false)
|
||||
, m_prefixMatched(false)
|
||||
{}
|
||||
|
||||
bool KeywordMatcher::Score::operator<(KeywordMatcher::Score const & s) const
|
||||
{
|
||||
if (m_fullQueryMatched != s.m_fullQueryMatched)
|
||||
return m_fullQueryMatched < s.m_fullQueryMatched;
|
||||
if (m_numQueryTokensAndPrefixMatched != s.m_numQueryTokensAndPrefixMatched)
|
||||
return m_numQueryTokensAndPrefixMatched < s.m_numQueryTokensAndPrefixMatched;
|
||||
if (m_prefixMatched != s.m_prefixMatched)
|
||||
return m_prefixMatched < s.m_prefixMatched;
|
||||
if (m_nameTokensMatched != s.m_nameTokensMatched)
|
||||
return m_nameTokensMatched < s.m_nameTokensMatched;
|
||||
if (m_sumTokenMatchDistance != s.m_sumTokenMatchDistance)
|
||||
return m_sumTokenMatchDistance > s.m_sumTokenMatchDistance;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool KeywordMatcher::Score::operator==(KeywordMatcher::Score const & s) const
|
||||
{
|
||||
return m_sumTokenMatchDistance == s.m_sumTokenMatchDistance && m_nameTokensMatched == s.m_nameTokensMatched &&
|
||||
m_numQueryTokensAndPrefixMatched == s.m_numQueryTokensAndPrefixMatched &&
|
||||
m_fullQueryMatched == s.m_fullQueryMatched && m_prefixMatched == s.m_prefixMatched;
|
||||
}
|
||||
|
||||
bool KeywordMatcher::Score::LessInTokensLength(Score const & s) const
|
||||
{
|
||||
if (m_fullQueryMatched)
|
||||
{
|
||||
ASSERT(s.m_fullQueryMatched, ());
|
||||
return m_nameTokensLength > s.m_nameTokensLength;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
string DebugPrint(KeywordMatcher::Score const & score)
|
||||
{
|
||||
ostringstream out;
|
||||
out << "KeywordMatcher::Score(";
|
||||
out << "FQM=" << score.m_fullQueryMatched;
|
||||
out << ",nQTM=" << static_cast<int>(score.m_numQueryTokensAndPrefixMatched);
|
||||
out << ",PM=" << score.m_prefixMatched;
|
||||
out << ",NTM=";
|
||||
for (int i = static_cast<int>(kMaxNumTokens) - 1; i >= 0; --i)
|
||||
out << ((score.m_nameTokensMatched >> i) & 1);
|
||||
out << ",STMD=" << score.m_sumTokenMatchDistance;
|
||||
out << ")";
|
||||
return out.str();
|
||||
}
|
||||
} // namespace search
|
||||
60
libs/search/keyword_matcher.hpp
Normal file
60
libs/search/keyword_matcher.hpp
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/common.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
class KeywordMatcher
|
||||
{
|
||||
public:
|
||||
class Score
|
||||
{
|
||||
public:
|
||||
Score();
|
||||
|
||||
// *NOTE* m_nameTokensLength is usually used as a late stage tiebreaker
|
||||
// and does not take part in the operators.
|
||||
bool operator<(Score const & s) const;
|
||||
bool operator==(Score const & s) const;
|
||||
bool operator!=(Score const & s) const { return !(*this == s); }
|
||||
|
||||
bool LessInTokensLength(Score const & s) const;
|
||||
|
||||
bool IsQueryMatched() const { return m_fullQueryMatched; }
|
||||
|
||||
private:
|
||||
friend class KeywordMatcher;
|
||||
friend std::string DebugPrint(Score const & score);
|
||||
|
||||
uint32_t m_sumTokenMatchDistance;
|
||||
uint32_t m_nameTokensMatched;
|
||||
uint32_t m_nameTokensLength;
|
||||
uint8_t m_numQueryTokensAndPrefixMatched;
|
||||
bool m_fullQueryMatched : 1;
|
||||
bool m_prefixMatched : 1;
|
||||
};
|
||||
|
||||
KeywordMatcher();
|
||||
|
||||
void Clear();
|
||||
|
||||
/// Internal copy of keywords is made.
|
||||
void SetKeywords(QueryString const & query);
|
||||
|
||||
/// @return Score of the name (greater is better).
|
||||
//@{
|
||||
Score CalcScore(std::string_view name) const;
|
||||
Score CalcScore(strings::UniString const & name) const;
|
||||
Score CalcScore(strings::UniString const * tokens, size_t count) const;
|
||||
//@}
|
||||
|
||||
private:
|
||||
std::vector<strings::UniString> m_keywords;
|
||||
strings::UniString m_prefix;
|
||||
};
|
||||
} // namespace search
|
||||
298
libs/search/latlon_match.cpp
Normal file
298
libs/search/latlon_match.cpp
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
#include "search/latlon_match.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
string const kSpaces = " \t";
|
||||
string const kCharsToSkip = " \n\t,;:.()";
|
||||
string const kDecimalMarks = ".,";
|
||||
|
||||
bool IsDecimalMark(char c)
|
||||
{
|
||||
return kDecimalMarks.find(c) != string::npos;
|
||||
}
|
||||
|
||||
bool IsNegativeSymbol(char c)
|
||||
{
|
||||
return c == '-';
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
void SkipSpaces(Char *& s)
|
||||
{
|
||||
while (kSpaces.find(*s) != string::npos)
|
||||
++s;
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
void Skip(Char *& s)
|
||||
{
|
||||
while (kCharsToSkip.find(*s) != string::npos)
|
||||
++s;
|
||||
}
|
||||
|
||||
bool MatchDMSArray(char const *& s, char const * arr[], size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
size_t const len = strlen(arr[i]);
|
||||
if (strncmp(s, arr[i], len) == 0)
|
||||
{
|
||||
s += len;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int GetDMSIndex(char const *& s)
|
||||
{
|
||||
char const * arrDegree[] = {"*", "°"};
|
||||
char const * arrMinutes[] = {"\'", "’", "′"};
|
||||
char const * arrSeconds[] = {"\"", "”", "″", "\'\'", "’’", "′′"};
|
||||
|
||||
if (MatchDMSArray(s, arrDegree, ARRAY_SIZE(arrDegree)))
|
||||
return 0;
|
||||
if (MatchDMSArray(s, arrSeconds, ARRAY_SIZE(arrSeconds)))
|
||||
return 2;
|
||||
if (MatchDMSArray(s, arrMinutes, ARRAY_SIZE(arrMinutes)))
|
||||
return 1;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool SkipNSEW(char const *& s, char const * (&arrPos)[4])
|
||||
{
|
||||
Skip(s);
|
||||
|
||||
int ind;
|
||||
switch (*s)
|
||||
{
|
||||
case 'N':
|
||||
case 'n': ind = 0; break;
|
||||
case 'S':
|
||||
case 's': ind = 1; break;
|
||||
case 'E':
|
||||
case 'e': ind = 2; break;
|
||||
case 'W':
|
||||
case 'w': ind = 3; break;
|
||||
default: return false;
|
||||
}
|
||||
|
||||
arrPos[ind] = s++;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempts to read a double from the start of |str|
|
||||
// in one of what we assume are two most common forms
|
||||
// for lat/lon: decimal digits separated either
|
||||
// by a dot or by a comma, with digits on both sides
|
||||
// of the separator.
|
||||
// If the attempt fails, falls back to std::strtod.
|
||||
double EatDouble(char const * str, char ** strEnd)
|
||||
{
|
||||
bool gotDigitBeforeMark = false;
|
||||
bool gotMark = false;
|
||||
bool gotDigitAfterMark = false;
|
||||
char const * markPos = nullptr;
|
||||
char const * p = str;
|
||||
double modifier = 1.0;
|
||||
while (true)
|
||||
{
|
||||
if (IsDecimalMark(*p))
|
||||
{
|
||||
if (gotMark)
|
||||
break;
|
||||
gotMark = true;
|
||||
markPos = p;
|
||||
}
|
||||
else if (isdigit(*p))
|
||||
{
|
||||
if (gotMark)
|
||||
gotDigitAfterMark = true;
|
||||
else
|
||||
gotDigitBeforeMark = true;
|
||||
}
|
||||
else if (IsNegativeSymbol(*p))
|
||||
{
|
||||
modifier = -1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
|
||||
if (gotDigitBeforeMark && gotMark && gotDigitAfterMark)
|
||||
{
|
||||
string const part1(str, markPos);
|
||||
string const part2(markPos + 1, p);
|
||||
*strEnd = const_cast<char *>(p);
|
||||
auto const x1 = atof(part1.c_str());
|
||||
auto const x2 = atof(part2.c_str());
|
||||
return x1 + x2 * modifier * pow(10.0, -static_cast<double>(part2.size()));
|
||||
}
|
||||
|
||||
return strtod(str, strEnd);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace search
|
||||
{
|
||||
bool MatchLatLonDegree(string const & query, double & lat, double & lon)
|
||||
{
|
||||
// should be default initialization (0, false)
|
||||
array<pair<double, bool>, 6> v;
|
||||
|
||||
int base = 0;
|
||||
|
||||
// Positions of N, S, E, W symbols
|
||||
char const * arrPos[] = {nullptr, nullptr, nullptr, nullptr};
|
||||
bool arrDegreeSymbol[] = {false, false};
|
||||
|
||||
char const * const startQuery = query.c_str();
|
||||
char const * s = startQuery;
|
||||
while (true)
|
||||
{
|
||||
char const * s1 = s;
|
||||
char const * s11 = s;
|
||||
if (SkipNSEW(s, arrPos))
|
||||
{
|
||||
s11 = s;
|
||||
Skip(s);
|
||||
}
|
||||
else
|
||||
SkipSpaces(s);
|
||||
|
||||
if (!*s)
|
||||
{
|
||||
// End of the string - check matching.
|
||||
break;
|
||||
}
|
||||
|
||||
char * s2;
|
||||
double const x = EatDouble(s, &s2);
|
||||
if (s == s2)
|
||||
{
|
||||
// invalid token
|
||||
if (s == s11)
|
||||
{
|
||||
// Return error if there are no any delimiters.
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Check matching if token is delimited.
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (x < 0 && s == s1 && !(s == startQuery || kSpaces.find(*(s - 1)) != string::npos))
|
||||
{
|
||||
// Skip input like "3-8"
|
||||
return false;
|
||||
}
|
||||
|
||||
s = s2;
|
||||
SkipSpaces(s);
|
||||
|
||||
int i = GetDMSIndex(s);
|
||||
bool degreeSymbol = true;
|
||||
if (i == -1)
|
||||
{
|
||||
// try to assign next possible value mark
|
||||
if (arrDegreeSymbol[base / 3])
|
||||
{
|
||||
if (!v[base + 1].second)
|
||||
i = 1;
|
||||
else
|
||||
i = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = 0;
|
||||
degreeSymbol = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == 0) // degrees
|
||||
{
|
||||
if (v[base].second)
|
||||
{
|
||||
if (base == 0)
|
||||
{
|
||||
base += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
// too many degree values
|
||||
return false;
|
||||
}
|
||||
}
|
||||
arrDegreeSymbol[base / 3] = degreeSymbol;
|
||||
}
|
||||
else // minutes or seconds
|
||||
if (x < 0.0 || x > 60.0 || // minutes or seconds should be in [0, 60] range
|
||||
v[base + i].second || // value already exists
|
||||
!v[base].second || // no degrees found for value
|
||||
(i == 2 && !v[base + 1].second)) // no minutes for seconds
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
v[base + i].first = x;
|
||||
v[base + i].second = true;
|
||||
}
|
||||
|
||||
if (!v[0].second || !v[3].second)
|
||||
{
|
||||
// degree should exist for both coordinates
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((arrPos[0] && arrPos[1]) || (arrPos[2] && arrPos[3]))
|
||||
{
|
||||
// control symbols should match only once
|
||||
return false;
|
||||
}
|
||||
|
||||
// Calculate Lat, Lon with correct sign.
|
||||
lat = fabs(v[0].first) + v[1].first / 60.0 + v[2].first / 3600.0;
|
||||
if (v[0].first < 0.0)
|
||||
lat = -lat;
|
||||
|
||||
lon = fabs(v[3].first) + v[4].first / 60.0 + v[5].first / 3600.0;
|
||||
if (v[3].first < 0.0)
|
||||
lon = -lon;
|
||||
|
||||
if (max(arrPos[0], arrPos[1]) > max(arrPos[2], arrPos[3]))
|
||||
swap(lat, lon);
|
||||
|
||||
if (arrPos[1] != nullptr)
|
||||
lat = -lat;
|
||||
if (arrPos[3] != nullptr)
|
||||
lon = -lon;
|
||||
|
||||
// Valid input ranges for longitude are: [0, 360] or [-180, 180].
|
||||
// We normalize it to [-180, 180].
|
||||
if (lon < -180.0 || lon > 360.0)
|
||||
return false;
|
||||
|
||||
if (lon > 180.0)
|
||||
lon -= 360.0;
|
||||
|
||||
return fabs(lat) <= 90.0;
|
||||
}
|
||||
} // namespace search
|
||||
9
libs/search/latlon_match.hpp
Normal file
9
libs/search/latlon_match.hpp
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace search
|
||||
{
|
||||
// Parses input query for most input coordinates cases.
|
||||
bool MatchLatLonDegree(std::string const & query, double & lat, double & lon);
|
||||
} // namespace search
|
||||
56
libs/search/lazy_centers_table.cpp
Normal file
56
libs/search/lazy_centers_table.cpp
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
#include "search/lazy_centers_table.hpp"
|
||||
|
||||
#include "indexer/mwm_set.hpp"
|
||||
|
||||
#include "platform/mwm_traits.hpp"
|
||||
|
||||
#include "defines.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
LazyCentersTable::LazyCentersTable(MwmValue const & value)
|
||||
: m_value(value)
|
||||
, m_state(STATE_NOT_LOADED)
|
||||
, m_reader(std::unique_ptr<ModelReader>())
|
||||
{}
|
||||
|
||||
void LazyCentersTable::EnsureTableLoaded()
|
||||
{
|
||||
if (m_state != STATE_NOT_LOADED)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
m_reader = m_value.m_cont.GetReader(CENTERS_FILE_TAG);
|
||||
}
|
||||
catch (RootException const & ex)
|
||||
{
|
||||
LOG(LERROR, ("Unable to load", CENTERS_FILE_TAG, ex.Msg()));
|
||||
m_state = STATE_FAILED;
|
||||
return;
|
||||
}
|
||||
|
||||
version::MwmTraits traits(m_value.GetMwmVersion());
|
||||
auto const format = traits.GetCentersTableFormat();
|
||||
|
||||
if (format == version::MwmTraits::CentersTableFormat::PlainEliasFanoMap)
|
||||
m_table = CentersTable::LoadV0(*m_reader.GetPtr(), m_value.GetHeader().GetDefGeometryCodingParams());
|
||||
else if (format == version::MwmTraits::CentersTableFormat::EliasFanoMapWithHeader)
|
||||
m_table = CentersTable::LoadV1(*m_reader.GetPtr());
|
||||
else
|
||||
CHECK(false, ("Unknown centers table format."));
|
||||
|
||||
if (m_table)
|
||||
m_state = STATE_LOADED;
|
||||
else
|
||||
m_state = STATE_FAILED;
|
||||
}
|
||||
|
||||
bool LazyCentersTable::Get(uint32_t id, m2::PointD & center)
|
||||
{
|
||||
EnsureTableLoaded();
|
||||
if (m_state != STATE_LOADED)
|
||||
return false;
|
||||
return m_table->Get(id, center);
|
||||
}
|
||||
} // namespace search
|
||||
41
libs/search/lazy_centers_table.hpp
Normal file
41
libs/search/lazy_centers_table.hpp
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/centers_table.hpp"
|
||||
|
||||
#include "coding/files_container.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
class MwmValue;
|
||||
|
||||
namespace search
|
||||
{
|
||||
class LazyCentersTable
|
||||
{
|
||||
public:
|
||||
enum State
|
||||
{
|
||||
STATE_NOT_LOADED,
|
||||
STATE_LOADED,
|
||||
STATE_FAILED
|
||||
};
|
||||
|
||||
explicit LazyCentersTable(MwmValue const & value);
|
||||
|
||||
inline State GetState() const { return m_state; }
|
||||
|
||||
void EnsureTableLoaded();
|
||||
|
||||
[[nodiscard]] bool Get(uint32_t id, m2::PointD & center);
|
||||
|
||||
private:
|
||||
MwmValue const & m_value;
|
||||
State m_state;
|
||||
|
||||
FilesContainerR::TReader m_reader;
|
||||
std::unique_ptr<CentersTable> m_table;
|
||||
};
|
||||
} // namespace search
|
||||
17
libs/search/localities_source.cpp
Normal file
17
libs/search/localities_source.cpp
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#include "search/localities_source.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
LocalitiesSource::LocalitiesSource()
|
||||
{
|
||||
auto & c = classif();
|
||||
|
||||
auto const city = c.GetTypeByPath({"place", "city"});
|
||||
c.ForEachInSubtree([this](uint32_t c) { m_cities.push_back(c); }, city);
|
||||
|
||||
auto const town = c.GetTypeByPath({"place", "town"});
|
||||
c.ForEachInSubtree([this](uint32_t t) { m_towns.push_back(t); }, town);
|
||||
}
|
||||
} // namespace search
|
||||
24
libs/search/localities_source.hpp
Normal file
24
libs/search/localities_source.hpp
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
struct LocalitiesSource
|
||||
{
|
||||
LocalitiesSource();
|
||||
|
||||
template <typename Fn>
|
||||
void ForEachType(Fn && fn) const
|
||||
{
|
||||
for (auto const c : m_cities)
|
||||
fn(c);
|
||||
for (auto const t : m_towns)
|
||||
fn(t);
|
||||
}
|
||||
|
||||
std::vector<uint32_t> m_cities;
|
||||
std::vector<uint32_t> m_towns;
|
||||
};
|
||||
} // namespace search
|
||||
313
libs/search/locality_finder.cpp
Normal file
313
libs/search/locality_finder.cpp
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
#include "search/locality_finder.hpp"
|
||||
|
||||
#include "search/categories_cache.hpp"
|
||||
#include "search/cbv.hpp"
|
||||
#include "search/dummy_rank_table.hpp"
|
||||
#include "search/mwm_context.hpp"
|
||||
|
||||
#include "indexer/data_source.hpp"
|
||||
#include "indexer/feature_visibility.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
double constexpr kMaxCityRadiusMeters = 30000.0;
|
||||
double constexpr kMaxVillageRadiusMeters = 2000.0;
|
||||
|
||||
struct Filter
|
||||
{
|
||||
public:
|
||||
virtual ~Filter() = default;
|
||||
virtual bool IsGood(uint32_t id) const = 0;
|
||||
};
|
||||
|
||||
class CityFilter : public Filter
|
||||
{
|
||||
public:
|
||||
explicit CityFilter(RankTable const & ranks) : m_ranks(ranks) {}
|
||||
|
||||
// Filter overrides:
|
||||
bool IsGood(uint32_t id) const override { return m_ranks.Get(id) != 0; }
|
||||
|
||||
private:
|
||||
RankTable const & m_ranks;
|
||||
};
|
||||
|
||||
class VillageFilter : public Filter
|
||||
{
|
||||
public:
|
||||
VillageFilter(MwmContext const & ctx, VillagesCache & villages) : m_cbv(villages.Get(ctx)) {}
|
||||
|
||||
// Filter overrides:
|
||||
bool IsGood(uint32_t id) const override { return m_cbv.HasBit(id); }
|
||||
|
||||
private:
|
||||
CBV m_cbv;
|
||||
};
|
||||
|
||||
class LocalitiesLoader
|
||||
{
|
||||
public:
|
||||
LocalitiesLoader(MwmContext const & ctx, CitiesBoundariesTable const & boundaries, Filter const & filter,
|
||||
LocalityFinder::Holder & holder, map<MwmSet::MwmId, unordered_set<uint32_t>> & loadedIds)
|
||||
: m_ctx(ctx)
|
||||
, m_boundaries(boundaries)
|
||||
, m_filter(filter)
|
||||
, m_holder(holder)
|
||||
, m_loadedIds(loadedIds[m_ctx.GetId()])
|
||||
{}
|
||||
|
||||
void operator()(uint32_t id) const
|
||||
{
|
||||
if (!m_filter.IsGood(id))
|
||||
return;
|
||||
|
||||
if (m_loadedIds.count(id) != 0)
|
||||
return;
|
||||
|
||||
auto ft = m_ctx.GetFeature(id);
|
||||
if (!ft)
|
||||
return;
|
||||
|
||||
if (ft->GetGeomType() != feature::GeomType::Point)
|
||||
return;
|
||||
|
||||
using namespace ftypes;
|
||||
switch (IsLocalityChecker::Instance().GetType(*ft))
|
||||
{
|
||||
case LocalityType::City:
|
||||
case LocalityType::Town:
|
||||
case LocalityType::Village: break;
|
||||
default: return;
|
||||
}
|
||||
|
||||
auto const population = ftypes::GetPopulation(*ft);
|
||||
if (population == 0)
|
||||
return;
|
||||
|
||||
auto const & names = ft->GetNames();
|
||||
auto const center = ft->GetCenter();
|
||||
|
||||
CitiesBoundariesTable::Boundaries boundaries;
|
||||
auto const fid = ft->GetID();
|
||||
m_boundaries.Get(fid, boundaries);
|
||||
|
||||
m_holder.Add(LocalityItem(names, center, std::move(boundaries), population, fid));
|
||||
m_loadedIds.insert(id);
|
||||
}
|
||||
|
||||
private:
|
||||
MwmContext const & m_ctx;
|
||||
CitiesBoundariesTable const & m_boundaries;
|
||||
Filter const & m_filter;
|
||||
|
||||
LocalityFinder::Holder & m_holder;
|
||||
unordered_set<uint32_t> & m_loadedIds;
|
||||
};
|
||||
|
||||
int GetVillagesScale()
|
||||
{
|
||||
auto currentVillagesMinDrawableScale = 0;
|
||||
ftypes::IsVillageChecker::Instance().ForEachType([¤tVillagesMinDrawableScale](uint32_t type)
|
||||
{
|
||||
feature::TypesHolder th;
|
||||
th.Assign(type);
|
||||
currentVillagesMinDrawableScale = max(currentVillagesMinDrawableScale, GetMinDrawableScaleClassifOnly(th));
|
||||
});
|
||||
|
||||
// Needed for backward compatibility. |kCompatibilityVillagesMinDrawableScale| should be set to
|
||||
// maximal value we have in mwms over all data versions.
|
||||
int constexpr kCompatibilityVillagesMinDrawableScale = 13;
|
||||
ASSERT_LESS_OR_EQUAL(currentVillagesMinDrawableScale, kCompatibilityVillagesMinDrawableScale,
|
||||
("Set kCompatibilityVillagesMinDrawableScale to", currentVillagesMinDrawableScale));
|
||||
return max(currentVillagesMinDrawableScale, kCompatibilityVillagesMinDrawableScale);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// LocalityItem ------------------------------------------------------------------------------------
|
||||
LocalityItem::LocalityItem(StringUtf8Multilang const & names, m2::PointD const & center, Boundaries && boundaries,
|
||||
uint64_t population, FeatureID const & id)
|
||||
: m_names(names)
|
||||
, m_center(center)
|
||||
, m_boundaries(std::move(boundaries))
|
||||
, m_population(population)
|
||||
, m_id(id)
|
||||
{}
|
||||
|
||||
string DebugPrint(LocalityItem const & item)
|
||||
{
|
||||
stringstream os;
|
||||
os << "Names = " << DebugPrint(item.m_names) << ", ";
|
||||
os << "Center = " << DebugPrint(item.m_center) << ", ";
|
||||
os << "Population = " << item.m_population << ", ";
|
||||
os << "Boundaries = " << DebugPrint(item.m_boundaries);
|
||||
return os.str();
|
||||
}
|
||||
|
||||
// LocalitySelector --------------------------------------------------------------------------------
|
||||
LocalitySelector::LocalitySelector(m2::PointD const & p) : m_p(p) {}
|
||||
|
||||
void LocalitySelector::operator()(LocalityItem const & item)
|
||||
{
|
||||
auto const inside = item.m_boundaries.HasPoint(m_p);
|
||||
|
||||
// TODO (@y, @m): replace this naive score by p-values on
|
||||
// multivariate Gaussian.
|
||||
double const distance = mercator::DistanceOnEarth(item.m_center, m_p);
|
||||
|
||||
// GetPopulationByRadius may return 0.
|
||||
double const score = (ftypes::GetPopulationByRadius(distance) + 1) / static_cast<double>(item.m_population);
|
||||
|
||||
if (!inside && m_inside)
|
||||
return;
|
||||
|
||||
ASSERT(inside || !m_inside, ());
|
||||
|
||||
if ((inside && !m_inside) || (score < m_score))
|
||||
{
|
||||
m_inside = inside;
|
||||
m_score = score;
|
||||
m_locality = &item;
|
||||
}
|
||||
}
|
||||
|
||||
// LocalityFinder::Holder --------------------------------------------------------------------------
|
||||
LocalityFinder::Holder::Holder(double radiusMeters) : m_radiusMeters(radiusMeters) {}
|
||||
|
||||
bool LocalityFinder::Holder::IsCovered(m2::RectD const & rect) const
|
||||
{
|
||||
bool covered = false;
|
||||
m_coverage.ForEachInRect(rect, [&covered](bool) { covered = true; });
|
||||
return covered;
|
||||
}
|
||||
|
||||
void LocalityFinder::Holder::SetCovered(m2::PointD const & p)
|
||||
{
|
||||
m_coverage.Add(true, m2::RectD(p, p));
|
||||
}
|
||||
|
||||
void LocalityFinder::Holder::Add(LocalityItem const & item)
|
||||
{
|
||||
m_localities.Add(item, m2::RectD(item.m_center, item.m_center));
|
||||
}
|
||||
|
||||
void LocalityFinder::Holder::ForEachInVicinity(m2::RectD const & rect, LocalitySelector & selector) const
|
||||
{
|
||||
m_localities.ForEachInRect(rect, selector);
|
||||
}
|
||||
|
||||
m2::RectD LocalityFinder::Holder::GetRect(m2::PointD const & p) const
|
||||
{
|
||||
return mercator::RectByCenterXYAndSizeInMeters(p, m_radiusMeters);
|
||||
}
|
||||
|
||||
m2::RectD LocalityFinder::Holder::GetDRect(m2::PointD const & p) const
|
||||
{
|
||||
return mercator::RectByCenterXYAndSizeInMeters(p, 2 * m_radiusMeters);
|
||||
}
|
||||
|
||||
void LocalityFinder::Holder::Clear()
|
||||
{
|
||||
m_coverage.Clear();
|
||||
m_localities.Clear();
|
||||
}
|
||||
|
||||
// LocalityFinder ----------------------------------------------------------------------------------
|
||||
LocalityFinder::LocalityFinder(DataSource const & dataSource, CitiesBoundariesTable const & boundariesTable,
|
||||
VillagesCache & villagesCache)
|
||||
: m_dataSource(dataSource)
|
||||
, m_boundariesTable(boundariesTable)
|
||||
, m_villagesCache(villagesCache)
|
||||
, m_cities(kMaxCityRadiusMeters)
|
||||
, m_villages(kMaxVillageRadiusMeters)
|
||||
, m_mapsLoaded(false)
|
||||
{}
|
||||
|
||||
void LocalityFinder::ClearCache()
|
||||
{
|
||||
m_ranks.reset();
|
||||
m_cities.Clear();
|
||||
m_villages.Clear();
|
||||
|
||||
m_maps.Clear();
|
||||
m_worldId.Reset();
|
||||
m_mapsLoaded = false;
|
||||
|
||||
m_loadedIds.clear();
|
||||
}
|
||||
|
||||
void LocalityFinder::LoadVicinity(m2::PointD const & p, bool loadCities, bool loadVillages)
|
||||
{
|
||||
UpdateMaps();
|
||||
|
||||
if (loadCities)
|
||||
{
|
||||
m2::RectD const crect = m_cities.GetDRect(p);
|
||||
auto handle = m_dataSource.GetMwmHandleById(m_worldId);
|
||||
if (handle.IsAlive())
|
||||
{
|
||||
auto const & value = *handle.GetValue();
|
||||
if (!m_ranks)
|
||||
m_ranks = RankTable::Load(value.m_cont, SEARCH_RANKS_FILE_TAG);
|
||||
if (!m_ranks)
|
||||
m_ranks = make_unique<DummyRankTable>();
|
||||
|
||||
MwmContext ctx(std::move(handle));
|
||||
ctx.ForEachIndex(crect, LocalitiesLoader(ctx, m_boundariesTable, CityFilter(*m_ranks), m_cities, m_loadedIds));
|
||||
}
|
||||
|
||||
m_cities.SetCovered(p);
|
||||
}
|
||||
|
||||
if (loadVillages)
|
||||
{
|
||||
m2::RectD const vrect = m_villages.GetDRect(p);
|
||||
m_maps.ForEachInRect(m2::RectD(p, p), [&](MwmSet::MwmId const & id)
|
||||
{
|
||||
auto handle = m_dataSource.GetMwmHandleById(id);
|
||||
if (!handle.IsAlive())
|
||||
return;
|
||||
|
||||
static int const scale = GetVillagesScale();
|
||||
MwmContext ctx(std::move(handle));
|
||||
ctx.ForEachIndex(
|
||||
vrect, scale,
|
||||
LocalitiesLoader(ctx, m_boundariesTable, VillageFilter(ctx, m_villagesCache), m_villages, m_loadedIds));
|
||||
});
|
||||
|
||||
m_villages.SetCovered(p);
|
||||
}
|
||||
}
|
||||
|
||||
void LocalityFinder::UpdateMaps()
|
||||
{
|
||||
if (m_mapsLoaded)
|
||||
return;
|
||||
|
||||
vector<shared_ptr<MwmInfo>> mwmsInfo;
|
||||
m_dataSource.GetMwmsInfo(mwmsInfo);
|
||||
for (auto const & info : mwmsInfo)
|
||||
{
|
||||
MwmSet::MwmId id(info);
|
||||
|
||||
switch (info->GetType())
|
||||
{
|
||||
case MwmInfo::WORLD: m_worldId = id; break;
|
||||
/// @todo Use fair MWM rect from CountryInfoGetter here and everywhere in search?
|
||||
/// @see MwmInfo.m_bordersRect for details.
|
||||
case MwmInfo::COUNTRY: m_maps.Add(id, info->m_bordersRect); break;
|
||||
case MwmInfo::COASTS: break;
|
||||
}
|
||||
}
|
||||
m_mapsLoaded = true;
|
||||
}
|
||||
} // namespace search
|
||||
160
libs/search/locality_finder.hpp
Normal file
160
libs/search/locality_finder.hpp
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/cities_boundaries_table.hpp"
|
||||
|
||||
#include "indexer/feature_utils.hpp"
|
||||
#include "indexer/mwm_set.hpp"
|
||||
#include "indexer/rank_table.hpp"
|
||||
|
||||
#include "platform/preferred_languages.hpp"
|
||||
|
||||
#include "coding/string_utf8_multilang.hpp"
|
||||
|
||||
#include "geometry/point2d.hpp"
|
||||
#include "geometry/rect2d.hpp"
|
||||
#include "geometry/tree4d.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace search
|
||||
{
|
||||
class VillagesCache;
|
||||
|
||||
struct LocalityItem
|
||||
{
|
||||
using Boundaries = CitiesBoundariesTable::Boundaries;
|
||||
|
||||
LocalityItem(StringUtf8Multilang const & names, m2::PointD const & center, Boundaries && boundaries,
|
||||
uint64_t population, FeatureID const & id);
|
||||
|
||||
bool GetName(int8_t lang, std::string_view & name) const { return m_names.GetString(lang, name); }
|
||||
|
||||
bool GetSpecifiedOrDefaultName(int8_t lang, std::string_view & name) const
|
||||
{
|
||||
return GetName(lang, name) || GetName(StringUtf8Multilang::kDefaultCode, name);
|
||||
}
|
||||
|
||||
bool GetReadableName(std::string_view & name) const
|
||||
{
|
||||
auto const mwmInfo = m_id.m_mwmId.GetInfo();
|
||||
if (!mwmInfo)
|
||||
return false;
|
||||
|
||||
feature::NameParamsOut out;
|
||||
feature::GetReadableName(
|
||||
{m_names, mwmInfo->GetRegionData(), languages::GetCurrentMapLanguage(), false /* allowTranslit */}, out);
|
||||
|
||||
name = out.primary;
|
||||
return !name.empty();
|
||||
}
|
||||
|
||||
StringUtf8Multilang m_names;
|
||||
m2::PointD m_center;
|
||||
Boundaries m_boundaries;
|
||||
uint64_t m_population;
|
||||
FeatureID m_id;
|
||||
};
|
||||
|
||||
std::string DebugPrint(LocalityItem const & item);
|
||||
|
||||
class LocalitySelector
|
||||
{
|
||||
public:
|
||||
LocalitySelector(m2::PointD const & p);
|
||||
|
||||
void operator()(LocalityItem const & item);
|
||||
|
||||
template <typename Fn>
|
||||
bool WithBestLocality(Fn && fn) const
|
||||
{
|
||||
if (!m_locality)
|
||||
return false;
|
||||
fn(*m_locality);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
m2::PointD const m_p;
|
||||
|
||||
bool m_inside = false;
|
||||
double m_score = std::numeric_limits<double>::max();
|
||||
LocalityItem const * m_locality = nullptr;
|
||||
};
|
||||
|
||||
class LocalityFinder
|
||||
{
|
||||
public:
|
||||
class Holder
|
||||
{
|
||||
public:
|
||||
Holder(double radiusMeters);
|
||||
|
||||
bool IsCovered(m2::RectD const & rect) const;
|
||||
void SetCovered(m2::PointD const & p);
|
||||
|
||||
void Add(LocalityItem const & item);
|
||||
void ForEachInVicinity(m2::RectD const & rect, LocalitySelector & selector) const;
|
||||
|
||||
m2::RectD GetRect(m2::PointD const & p) const;
|
||||
m2::RectD GetDRect(m2::PointD const & p) const;
|
||||
|
||||
void Clear();
|
||||
|
||||
private:
|
||||
double const m_radiusMeters;
|
||||
m4::Tree<bool> m_coverage;
|
||||
m4::Tree<LocalityItem> m_localities;
|
||||
|
||||
DISALLOW_COPY_AND_MOVE(Holder);
|
||||
};
|
||||
|
||||
LocalityFinder(DataSource const & dataSource, CitiesBoundariesTable const & boundaries,
|
||||
VillagesCache & villagesCache);
|
||||
|
||||
template <typename Fn>
|
||||
bool GetLocality(m2::PointD const & p, Fn && fn)
|
||||
{
|
||||
m2::RectD const crect = m_cities.GetRect(p);
|
||||
m2::RectD const vrect = m_villages.GetRect(p);
|
||||
|
||||
LoadVicinity(p, !m_cities.IsCovered(crect) /* loadCities */, !m_villages.IsCovered(vrect) /* loadVillages */);
|
||||
|
||||
LocalitySelector selector(p);
|
||||
m_cities.ForEachInVicinity(crect, selector);
|
||||
m_villages.ForEachInVicinity(vrect, selector);
|
||||
|
||||
return selector.WithBestLocality(std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
void ClearCache();
|
||||
|
||||
private:
|
||||
void LoadVicinity(m2::PointD const & p, bool loadCities, bool loadVillages);
|
||||
void UpdateMaps();
|
||||
|
||||
DataSource const & m_dataSource;
|
||||
CitiesBoundariesTable const & m_boundariesTable;
|
||||
VillagesCache & m_villagesCache;
|
||||
|
||||
Holder m_cities;
|
||||
Holder m_villages;
|
||||
|
||||
m4::Tree<MwmSet::MwmId> m_maps;
|
||||
MwmSet::MwmId m_worldId;
|
||||
bool m_mapsLoaded;
|
||||
|
||||
std::unique_ptr<RankTable> m_ranks;
|
||||
|
||||
std::map<MwmSet::MwmId, std::unordered_set<uint32_t>> m_loadedIds;
|
||||
};
|
||||
} // namespace search
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue