Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 13:58:55 +01:00
parent 4af19165ec
commit 68073add76
12458 changed files with 12350765 additions and 2 deletions

188
libs/search/CMakeLists.txt Normal file
View file

@ -0,0 +1,188 @@
project(search)
set(SRC
algos.hpp
approximate_string_match.cpp
approximate_string_match.hpp
base/inverted_list.hpp
base/mem_search_index.hpp
base/text_index/dictionary.hpp
base/text_index/header.cpp
base/text_index/header.hpp
base/text_index/mem.cpp
base/text_index/mem.hpp
base/text_index/merger.cpp
base/text_index/merger.hpp
base/text_index/postings.hpp
base/text_index/reader.hpp
base/text_index/text_index.cpp
base/text_index/text_index.hpp
base/text_index/utils.hpp
bookmarks/data.cpp
bookmarks/data.hpp
bookmarks/processor.cpp
bookmarks/processor.hpp
bookmarks/results.hpp
bookmarks/types.cpp
bookmarks/types.hpp
cancel_exception.hpp
categories_cache.cpp
categories_cache.hpp
categories_set.hpp
cbv.cpp
cbv.hpp
cities_boundaries_table.cpp
cities_boundaries_table.hpp
city_finder.cpp
city_finder.hpp
common.hpp
cuisine_filter.cpp
cuisine_filter.hpp
displayed_categories.cpp
displayed_categories.hpp
doc_vec.cpp
doc_vec.hpp
downloader_search_callback.cpp
downloader_search_callback.hpp
dummy_rank_table.cpp
dummy_rank_table.hpp
editor_delegate.cpp
editor_delegate.hpp
emitter.hpp
engine.cpp
engine.hpp
feature_loader.cpp
feature_loader.hpp
feature_offset_match.hpp
features_filter.cpp
features_filter.hpp
features_layer.cpp
features_layer.hpp
features_layer_matcher.cpp
features_layer_matcher.hpp
# Template functions implementation. Should be included when using.
# features_layer_path_finder.cpp
features_layer_path_finder.hpp
filtering_params.hpp
geocoder.cpp
geocoder.hpp
geocoder_context.cpp
geocoder_context.hpp
geocoder_locality.cpp
geocoder_locality.hpp
geometry_cache.cpp
geometry_cache.hpp
geometry_utils.cpp
geometry_utils.hpp
highlighting.cpp
highlighting.hpp
house_detector.cpp
house_detector.hpp
house_numbers_matcher.cpp
house_numbers_matcher.hpp
house_to_street_table.cpp
house_to_street_table.hpp
idf_map.cpp
idf_map.hpp
intermediate_result.cpp
intermediate_result.hpp
intersection_result.cpp
intersection_result.hpp
interval_set.hpp
keyword_lang_matcher.cpp
keyword_lang_matcher.hpp
keyword_matcher.cpp
keyword_matcher.hpp
latlon_match.cpp
latlon_match.hpp
lazy_centers_table.cpp
lazy_centers_table.hpp
localities_source.cpp
localities_source.hpp
locality_finder.cpp
locality_finder.hpp
locality_scorer.cpp
locality_scorer.hpp
mode.cpp
mode.hpp
model.cpp
model.hpp
mwm_context.cpp
mwm_context.hpp
nested_rects_cache.cpp
nested_rects_cache.hpp
point_rect_matcher.hpp
postcode_points.cpp
postcode_points.hpp
pre_ranker.cpp
pre_ranker.hpp
pre_ranking_info.cpp
pre_ranking_info.hpp
processor.cpp
processor.hpp
projection_on_street.cpp
projection_on_street.hpp
query_params.cpp
query_params.hpp
query_saver.cpp
query_saver.hpp
ranker.cpp
ranker.hpp
ranking_info.cpp
ranking_info.hpp
ranking_utils.cpp
ranking_utils.hpp
region_address_getter.cpp
region_address_getter.hpp
region_info_getter.cpp
region_info_getter.hpp
result.cpp
result.hpp
retrieval.cpp
retrieval.hpp
reverse_geocoder.cpp
reverse_geocoder.hpp
search_index_values.hpp
search_params.cpp
search_params.hpp
search_trie.hpp
segment_tree.cpp
segment_tree.hpp
stats_cache.hpp
street_vicinity_loader.cpp
street_vicinity_loader.hpp
streets_matcher.cpp
streets_matcher.hpp
string_utils.cpp
string_utils.hpp
suggest.cpp
suggest.hpp
token_range.hpp
token_slice.cpp
token_slice.hpp
tracer.cpp
tracer.hpp
types_skipper.cpp
types_skipper.hpp
utils.cpp
utils.hpp
utm_mgrs_coords_match.cpp
utm_mgrs_coords_match.hpp
)
omim_add_library(${PROJECT_NAME} ${SRC})
target_link_libraries(${PROJECT_NAME}
editor
storage
ge0
openlocationcode
)
if(PLATFORM_DESKTOP)
add_subdirectory(search_tests_support)
add_subdirectory(search_quality)
endif()
omim_add_test_subdirectory(search_tests)
omim_add_test_subdirectory(search_integration_tests)

80
libs/search/algos.hpp Normal file
View file

@ -0,0 +1,80 @@
#pragma once
#include "base/base.hpp"
#include <algorithm>
#include <cstddef>
#include <vector>
namespace search
{
namespace impl
{
struct LS
{
size_t prevDecreasePos, decreaseValue;
size_t prevIncreasePos, increaseValue;
LS(size_t i)
{
prevDecreasePos = i;
decreaseValue = 1;
prevIncreasePos = i;
increaseValue = 1;
}
};
} // namespace impl
template <typename T, typename OutIter, typename Comp>
void LongestSubsequence(std::vector<T> const & in, OutIter out, Comp cmp)
{
if (in.empty())
return;
std::vector<impl::LS> v;
v.reserve(in.size());
for (size_t i = 0; i < in.size(); ++i)
v.push_back(impl::LS(i));
size_t res = 1;
size_t pos = 0;
for (size_t i = 0; i < v.size(); ++i)
{
for (size_t j = i + 1; j < v.size(); ++j)
{
if (cmp.Less(in[i], in[j]) && v[i].increaseValue + 1 >= v[j].increaseValue)
{
v[j].increaseValue = v[i].increaseValue + 1;
v[j].prevIncreasePos = i;
}
if (cmp.Greater(in[i], in[j]) && v[i].decreaseValue + 1 >= v[j].decreaseValue)
{
v[j].decreaseValue = v[i].decreaseValue + 1;
v[j].prevDecreasePos = i;
}
size_t const m = std::max(v[j].increaseValue, v[j].decreaseValue);
if (m > res)
{
res = m;
pos = j;
}
}
}
bool increasing = true;
if (v[pos].increaseValue < v[pos].decreaseValue)
increasing = false;
while (res-- > 0)
{
*out++ = in[pos];
if (increasing)
pos = v[pos].prevIncreasePos;
else
pos = v[pos].prevDecreasePos;
}
}
} // namespace search

View file

@ -0,0 +1,46 @@
#include "search/approximate_string_match.hpp"
// TODO: Сделать модель ошибок.
// Учитывать соседние кнопки на клавиатуре.
// 1. Сосед вместо нужной
// 2. Сосед до или после нужной.
namespace search
{
using strings::UniChar;
uint32_t DefaultMatchCost::Cost10(UniChar) const
{
return 256;
}
uint32_t DefaultMatchCost::Cost01(UniChar) const
{
return 256;
}
uint32_t DefaultMatchCost::Cost11(UniChar, UniChar) const
{
return 256;
}
uint32_t DefaultMatchCost::Cost12(UniChar, UniChar const *) const
{
return 512;
}
uint32_t DefaultMatchCost::Cost21(UniChar const *, UniChar) const
{
return 512;
}
uint32_t DefaultMatchCost::Cost22(UniChar const *, UniChar const *) const
{
return 512;
}
uint32_t DefaultMatchCost::SwapCost(UniChar, UniChar) const
{
return 256;
}
} // namespace search

View file

@ -0,0 +1,86 @@
#pragma once
#include "indexer/search_string_utils.hpp"
#include "base/base.hpp"
#include "base/buffer_vector.hpp"
#include <cstdint>
#include <queue>
namespace search
{
namespace impl
{
struct MatchCostData
{
uint32_t m_A, m_B;
uint32_t m_Cost;
MatchCostData() : m_A(0), m_B(0), m_Cost(0) {}
MatchCostData(uint32_t a, uint32_t b, uint32_t cost) : m_A(a), m_B(b), m_Cost(cost) {}
bool operator<(MatchCostData const & o) const { return m_Cost > o.m_Cost; }
};
template <typename PriorityQueue>
void PushMatchCost(PriorityQueue & q, uint32_t maxCost, uint32_t a, uint32_t b, uint32_t cost)
{
if (cost <= maxCost)
q.push(MatchCostData(a, b, cost));
}
} // namespace impl
class DefaultMatchCost
{
public:
uint32_t Cost10(strings::UniChar a) const;
uint32_t Cost01(strings::UniChar b) const;
uint32_t Cost11(strings::UniChar a, strings::UniChar b) const;
uint32_t Cost12(strings::UniChar a, strings::UniChar const * pB) const;
uint32_t Cost21(strings::UniChar const * pA, strings::UniChar b) const;
uint32_t Cost22(strings::UniChar const * pA, strings::UniChar const * pB) const;
uint32_t SwapCost(strings::UniChar a1, strings::UniChar a2) const;
};
template <typename Char, typename CostFn>
uint32_t StringMatchCost(Char const * sA, size_t sizeA, Char const * sB, size_t sizeB, CostFn const & costF,
uint32_t maxCost, bool bPrefixMatch = false)
{
std::priority_queue<impl::MatchCostData, buffer_vector<impl::MatchCostData, 256>> q;
q.push(impl::MatchCostData(0, 0, 0));
while (!q.empty())
{
uint32_t a = q.top().m_A;
uint32_t b = q.top().m_B;
uint32_t const c = q.top().m_Cost;
q.pop();
while (a < sizeA && b < sizeB && sA[a] == sB[b])
{
++a;
++b;
}
if (a == sizeA && (bPrefixMatch || b == sizeB))
return c;
if (a < sizeA)
impl::PushMatchCost(q, maxCost, a + 1, b, c + costF.Cost10(sA[a]));
if (b < sizeB)
impl::PushMatchCost(q, maxCost, a, b + 1, c + costF.Cost01(sB[b]));
if (a < sizeA && b < sizeB)
impl::PushMatchCost(q, maxCost, a + 1, b + 1, c + costF.Cost11(sA[a], sB[b]));
if (a + 1 < sizeA && b < sizeB)
impl::PushMatchCost(q, maxCost, a + 2, b + 1, c + costF.Cost21(&sA[a], sB[b]));
if (a < sizeA && b + 1 < sizeB)
impl::PushMatchCost(q, maxCost, a + 1, b + 2, c + costF.Cost12(sA[a], &sB[b]));
if (a + 1 < sizeA && b + 1 < sizeB)
{
impl::PushMatchCost(q, maxCost, a + 2, b + 2, c + costF.Cost22(&sA[a], &sB[b]));
if (sA[a] == sB[b + 1] && sA[a + 1] == sB[b])
impl::PushMatchCost(q, maxCost, a + 2, b + 2, c + costF.SwapCost(sA[a], sA[a + 1]));
}
}
return maxCost + 1;
}
} // namespace search

View file

@ -0,0 +1,56 @@
#pragma once
#include "base/assert.hpp"
#include <algorithm>
#include <cstddef>
#include <vector>
namespace search_base
{
// This class is supposed to be used in inverted index to store list
// of document ids.
template <typename Id>
class InvertedList
{
public:
using value_type = Id;
using Value = Id;
bool Add(Id const & id)
{
auto it = std::lower_bound(m_ids.begin(), m_ids.end(), id);
if (it != m_ids.end() && *it == id)
return false;
m_ids.insert(it, id);
return true;
}
bool Erase(Id const & id)
{
auto it = std::lower_bound(m_ids.begin(), m_ids.end(), id);
if (it == m_ids.end() || *it != id)
return false;
m_ids.erase(it);
return true;
}
template <typename ToDo>
void ForEach(ToDo && toDo) const
{
for (auto const & id : m_ids)
toDo(id);
}
size_t Size() const { return m_ids.size(); }
bool Empty() const { return Size() == 0; }
void Clear() { m_ids.clear(); }
void Swap(InvertedList & rhs) { m_ids.swap(rhs.m_ids); }
private:
std::vector<Id> m_ids;
};
} // namespace search_base

View file

@ -0,0 +1,97 @@
#pragma once
#include "search/base/inverted_list.hpp"
#include "indexer/trie.hpp"
#include "base/assert.hpp"
#include "base/mem_trie.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
namespace search_base
{
template <typename Id>
class MemSearchIndex
{
public:
using Token = strings::UniString;
using Char = Token::value_type;
using List = InvertedList<Id>;
using Trie = base::MemTrie<Token, List>;
using Iterator = trie::MemTrieIterator<Token, List>;
template <typename Doc>
void Add(Id const & id, Doc const & doc)
{
ForEachToken(id, doc, [&](Token const & token) { m_trie.Add(token, id); });
}
template <typename Doc>
void Erase(Id const & id, Doc const & doc)
{
ForEachToken(id, doc, [&](Token const & token) { m_trie.Erase(token, id); });
}
Iterator GetRootIterator() const { return Iterator(m_trie.GetRootIterator()); }
std::vector<Id> GetAllIds() const
{
return WithIds([&](std::vector<Id> & ids)
{ m_trie.ForEachInTrie([&](Token const & /* token */, Id const & id) { ids.push_back(id); }); });
}
size_t GetNumDocs(int8_t lang, strings::UniString const & token, bool prefix) const
{
auto const key = AddLang(lang, token);
if (!prefix)
{
size_t numDocs = 0;
m_trie.WithValuesHolder(key, [&](List const & list) { numDocs = list.Size(); });
return numDocs;
}
return WithIds([&](std::vector<Id> & ids)
{
m_trie.ForEachInSubtree(key, [&](Token const & /* token */, Id const & id) { ids.push_back(id); });
}).size();
}
private:
static Token AddLang(int8_t lang, Token const & token)
{
Token r(1 + token.size());
r[0] = static_cast<Char>(lang);
std::copy(token.begin(), token.end(), r.begin() + 1);
return r;
}
template <typename Doc, typename Fn>
void ForEachToken(Id const & /*id*/, Doc const & doc, Fn && fn)
{
doc.ForEachToken([&](int8_t lang, Token const & token)
{
if (lang >= 0)
fn(AddLang(lang, token));
});
}
template <typename Fn>
static std::vector<Id> WithIds(Fn && fn)
{
std::vector<Id> ids;
fn(ids);
base::SortUnique(ids);
return ids;
}
Trie m_trie;
};
} // namespace search_base

View file

@ -0,0 +1,116 @@
#pragma once
#include "search/base/text_index/header.hpp"
#include "search/base/text_index/text_index.hpp"
#include "coding/write_to_sink.hpp"
#include "base/assert.hpp"
#include "base/checked_cast.hpp"
#include <algorithm>
#include <cstdint>
#include <utility>
#include <vector>
namespace search_base
{
// The dictionary contains all tokens that are present
// in the text index.
class TextIndexDictionary
{
public:
bool GetTokenId(Token const & token, size_t & id) const
{
auto const it = std::lower_bound(m_tokens.cbegin(), m_tokens.cend(), token);
if (it == m_tokens.cend() || *it != token)
return false;
id = base::checked_cast<uint32_t>(std::distance(m_tokens.cbegin(), it));
return true;
}
void SetTokens(std::vector<Token> && tokens)
{
ASSERT(std::is_sorted(tokens.begin(), tokens.end()), ());
m_tokens = std::move(tokens);
}
std::vector<Token> const & GetTokens() const { return m_tokens; }
template <typename Sink>
void Serialize(Sink & sink, TextIndexHeader & header, uint64_t startPos) const
{
header.m_numTokens = base::checked_cast<uint32_t>(m_tokens.size());
header.m_dictPositionsOffset = RelativePos(sink, startPos);
// An uint32_t for each 32-bit offset and an uint32_t for the dummy entry at the end.
WriteZeroesToSink(sink, sizeof(uint32_t) * (header.m_numTokens + 1));
header.m_dictWordsOffset = RelativePos(sink, startPos);
std::vector<uint32_t> offsets;
offsets.reserve(header.m_numTokens + 1);
for (auto const & token : m_tokens)
{
offsets.emplace_back(RelativePos(sink, startPos));
SerializeToken(sink, token);
}
offsets.emplace_back(RelativePos(sink, startPos));
{
uint64_t const savedPos = sink.Pos();
sink.Seek(startPos + header.m_dictPositionsOffset);
for (uint32_t const o : offsets)
WriteToSink(sink, o);
CHECK_EQUAL(sink.Pos(), startPos + header.m_dictWordsOffset, ());
sink.Seek(savedPos);
}
}
template <typename Source>
void Deserialize(Source & source, TextIndexHeader const & header)
{
auto const startPos = source.Pos();
std::vector<uint32_t> tokenOffsets(header.m_numTokens + 1);
for (uint32_t & offset : tokenOffsets)
offset = ReadPrimitiveFromSource<uint32_t>(source);
uint64_t const expectedSize = header.m_dictWordsOffset - header.m_dictPositionsOffset;
CHECK_EQUAL(source.Pos(), startPos + expectedSize, ());
m_tokens.resize(header.m_numTokens);
for (size_t i = 0; i < m_tokens.size(); ++i)
{
size_t const size = base::checked_cast<size_t>(tokenOffsets[i + 1] - tokenOffsets[i]);
DeserializeToken(source, m_tokens[i], size);
}
}
private:
template <typename Sink>
static void SerializeToken(Sink & sink, Token const & token)
{
CHECK(!token.empty(), ());
// todo(@m) Endianness.
sink.Write(token.data(), token.size() * sizeof(typename Token::value_type));
}
template <typename Source>
static void DeserializeToken(Source & source, Token & token, size_t size)
{
CHECK_GREATER(size, 0, ());
ASSERT_EQUAL(size % sizeof(typename Token::value_type), 0, ());
token.resize(size / sizeof(typename Token::value_type));
source.Read(&token[0], size);
}
template <typename Sink>
static uint32_t RelativePos(Sink & sink, uint64_t startPos)
{
return base::checked_cast<uint32_t>(sink.Pos() - startPos);
}
std::vector<Token> m_tokens;
};
} // namespace search_base

View file

@ -0,0 +1,9 @@
#include "search/base/text_index/header.hpp"
using namespace std;
namespace search_base
{
// static
string const TextIndexHeader::kHeaderMagic = "mapsmetextidx";
} // namespace search_base

View file

@ -0,0 +1,57 @@
#pragma once
#include "search/base/text_index/text_index.hpp"
#include "coding/reader.hpp"
#include "coding/write_to_sink.hpp"
#include "base/assert.hpp"
#include <cstdint>
#include <string>
namespace search_base
{
struct TextIndexHeader
{
template <typename Sink>
void Serialize(Sink & sink) const
{
CHECK_EQUAL(m_version, TextIndexVersion::V0, ());
sink.Write(kHeaderMagic.data(), kHeaderMagic.size());
WriteToSink(sink, static_cast<uint8_t>(m_version));
WriteToSink(sink, m_numTokens);
WriteToSink(sink, m_dictPositionsOffset);
WriteToSink(sink, m_dictWordsOffset);
WriteToSink(sink, m_postingsStartsOffset);
WriteToSink(sink, m_postingsListsOffset);
}
template <typename Source>
void Deserialize(Source & source)
{
CHECK_EQUAL(m_version, TextIndexVersion::V0, ());
std::string headerMagic(kHeaderMagic.size(), ' ');
source.Read(&headerMagic[0], headerMagic.size());
CHECK_EQUAL(headerMagic, kHeaderMagic, ());
m_version = static_cast<TextIndexVersion>(ReadPrimitiveFromSource<uint8_t>(source));
CHECK_EQUAL(m_version, TextIndexVersion::V0, ());
m_numTokens = ReadPrimitiveFromSource<uint32_t>(source);
m_dictPositionsOffset = ReadPrimitiveFromSource<uint32_t>(source);
m_dictWordsOffset = ReadPrimitiveFromSource<uint32_t>(source);
m_postingsStartsOffset = ReadPrimitiveFromSource<uint32_t>(source);
m_postingsListsOffset = ReadPrimitiveFromSource<uint32_t>(source);
}
static std::string const kHeaderMagic;
TextIndexVersion m_version = TextIndexVersion::Latest;
uint32_t m_numTokens = 0;
uint32_t m_dictPositionsOffset = 0;
uint32_t m_dictWordsOffset = 0;
uint32_t m_postingsStartsOffset = 0;
uint32_t m_postingsListsOffset = 0;
};
} // namespace search_base

View file

@ -0,0 +1,34 @@
#include "search/base/text_index/mem.hpp"
#include "base/stl_helpers.hpp"
using namespace std;
namespace search_base
{
void MemTextIndex::AddPosting(Token const & token, Posting const & posting)
{
m_postingsByToken[token].emplace_back(posting);
}
void MemTextIndex::SortPostings()
{
for (auto & entry : m_postingsByToken)
{
// A posting may occur several times in a document,
// so we remove duplicates for the docid index.
// If the count is needed for ranking it may be stored
// separately.
base::SortUnique(entry.second);
}
}
void MemTextIndex::BuildDictionary()
{
vector<Token> tokens;
tokens.reserve(m_postingsByToken.size());
for (auto const & entry : m_postingsByToken)
tokens.emplace_back(entry.first);
m_dictionary.SetTokens(std::move(tokens));
}
} // namespace search_base

View file

@ -0,0 +1,167 @@
#pragma once
#include "search/base/text_index/dictionary.hpp"
#include "search/base/text_index/header.hpp"
#include "search/base/text_index/postings.hpp"
#include "search/base/text_index/text_index.hpp"
#include "search/base/text_index/utils.hpp"
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "base/assert.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include <cstdint>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace search_base
{
class MemTextIndex
{
public:
MemTextIndex() = default;
void AddPosting(Token const & token, Posting const & posting);
// Executes |fn| on every posting associated with |token|.
// The order of postings is not specified.
template <typename Fn>
void ForEachPosting(Token const & token, Fn && fn) const
{
auto const it = m_postingsByToken.find(token);
if (it == m_postingsByToken.end())
return;
for (auto const p : it->second)
fn(p);
}
template <typename Fn>
void ForEachPosting(strings::UniString const & token, Fn && fn) const
{
ForEachPosting(strings::ToUtf8(token), std::forward<Fn>(fn));
}
template <typename Sink>
void Serialize(Sink & sink)
{
SortPostings();
BuildDictionary();
TextIndexHeader header;
uint64_t const startPos = sink.Pos();
// Will be filled in later.
header.Serialize(sink);
SerializeDictionary(sink, header, startPos);
SerializePostingsLists(sink, header, startPos);
uint64_t const finishPos = sink.Pos();
sink.Seek(startPos);
header.Serialize(sink);
sink.Seek(finishPos);
}
template <typename Source>
void Deserialize(Source & source)
{
uint64_t startPos = source.Pos();
TextIndexHeader header;
header.Deserialize(source);
DeserializeDictionary(source, header, startPos);
DeserializePostingsLists(source, header, startPos);
}
private:
class MemPostingsFetcher : public PostingsFetcher
{
public:
explicit MemPostingsFetcher(std::map<Token, std::vector<Posting>> const & postingsByToken)
: m_postingsByToken(postingsByToken)
, m_it(m_postingsByToken.begin())
{}
// PostingsFetcher overrides:
bool IsValid() const override { return m_it != m_postingsByToken.end(); }
void Advance() override
{
if (m_it != m_postingsByToken.end())
++m_it;
}
void ForEachPosting(Fn const & fn) const override
{
CHECK(IsValid(), ());
for (uint32_t p : m_it->second)
fn(p);
}
private:
std::map<Token, std::vector<Posting>> const & m_postingsByToken;
// Iterator to the current token that will be processed when ForEachPosting is called.
std::map<Token, std::vector<Posting>>::const_iterator m_it;
};
void SortPostings();
void BuildDictionary();
template <typename Sink>
void SerializeDictionary(Sink & sink, TextIndexHeader & header, uint64_t startPos) const
{
m_dictionary.Serialize(sink, header, startPos);
}
template <typename Source>
void DeserializeDictionary(Source & source, TextIndexHeader const & header, uint64_t startPos)
{
CHECK_EQUAL(source.Pos(), startPos + header.m_dictPositionsOffset, ());
m_dictionary.Deserialize(source, header);
}
template <typename Sink>
void SerializePostingsLists(Sink & sink, TextIndexHeader & header, uint64_t startPos) const
{
MemPostingsFetcher fetcher(m_postingsByToken);
WritePostings(sink, startPos, header, fetcher);
}
template <typename Source>
void DeserializePostingsLists(Source & source, TextIndexHeader const & header, uint64_t startPos)
{
CHECK_EQUAL(source.Pos(), startPos + header.m_postingsStartsOffset, ());
std::vector<uint32_t> postingsStarts(header.m_numTokens + 1);
for (uint32_t & start : postingsStarts)
start = ReadPrimitiveFromSource<uint32_t>(source);
auto const & tokens = m_dictionary.GetTokens();
CHECK_EQUAL(source.Pos(), startPos + header.m_postingsListsOffset, ());
m_postingsByToken.clear();
for (size_t i = 0; i < header.m_numTokens; ++i)
{
std::vector<uint32_t> postings;
uint32_t last = 0;
while (source.Pos() < startPos + postingsStarts[i + 1])
{
last += ReadVarUint<uint32_t>(source);
postings.emplace_back(last);
}
CHECK_EQUAL(source.Pos(), postingsStarts[i + 1], ());
m_postingsByToken.emplace(tokens[i], postings);
}
}
std::map<Token, std::vector<Posting>> m_postingsByToken;
TextIndexDictionary m_dictionary;
};
} // namespace search_base

View file

@ -0,0 +1,126 @@
#include "search/base/text_index/merger.hpp"
#include "search/base/text_index/dictionary.hpp"
#include "search/base/text_index/header.hpp"
#include "search/base/text_index/postings.hpp"
#include "coding/file_writer.hpp"
#include "coding/varint.hpp"
#include "coding/write_to_sink.hpp"
#include "base/assert.hpp"
#include "base/logging.hpp"
#include "base/stl_helpers.hpp"
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <utility>
#include <vector>
using namespace std;
namespace
{
using namespace search_base;
class MergedPostingsListFetcher : public PostingsFetcher
{
public:
MergedPostingsListFetcher(TextIndexDictionary const & dict, TextIndexReader const & index1,
TextIndexReader const & index2)
: m_dict(dict)
, m_index1(index1)
, m_index2(index2)
{
ReadPostings();
}
// PostingsFetcher overrides:
bool IsValid() const override
{
auto const & tokens = m_dict.GetTokens();
CHECK_LESS_OR_EQUAL(m_tokenId, tokens.size(), ());
return m_tokenId < tokens.size();
}
void Advance() override
{
auto const & tokens = m_dict.GetTokens();
CHECK_LESS_OR_EQUAL(m_tokenId, tokens.size(), ());
if (m_tokenId == tokens.size())
return;
++m_tokenId;
ReadPostings();
}
void ForEachPosting(Fn const & fn) const override
{
CHECK(IsValid(), ());
for (uint32_t p : m_postings)
fn(p);
}
private:
// Reads postings for the current token.
void ReadPostings()
{
m_postings.clear();
if (!IsValid())
return;
auto const & tokens = m_dict.GetTokens();
m_index1.ForEachPosting(tokens[m_tokenId], base::MakeBackInsertFunctor(m_postings));
m_index2.ForEachPosting(tokens[m_tokenId], base::MakeBackInsertFunctor(m_postings));
base::SortUnique(m_postings);
}
TextIndexDictionary const & m_dict;
TextIndexReader const & m_index1;
TextIndexReader const & m_index2;
// Index of the next token from |m_dict| to be processed.
size_t m_tokenId = 0;
vector<uint32_t> m_postings;
};
TextIndexDictionary MergeDictionaries(TextIndexDictionary const & dict1, TextIndexDictionary const & dict2)
{
vector<Token> commonTokens;
auto const & ts1 = dict1.GetTokens();
auto const & ts2 = dict2.GetTokens();
merge(ts1.begin(), ts1.end(), ts2.begin(), ts2.end(), back_inserter(commonTokens));
ASSERT(is_sorted(commonTokens.begin(), commonTokens.end()), ());
commonTokens.erase(unique(commonTokens.begin(), commonTokens.end()), commonTokens.end());
TextIndexDictionary dict;
dict.SetTokens(std::move(commonTokens));
return dict;
}
} // namespace
namespace search_base
{
// static
void TextIndexMerger::Merge(TextIndexReader const & index1, TextIndexReader const & index2, FileWriter & sink)
{
TextIndexDictionary const dict = MergeDictionaries(index1.GetDictionary(), index2.GetDictionary());
TextIndexHeader header;
uint64_t const startPos = sink.Pos();
// Will be filled in later.
header.Serialize(sink);
dict.Serialize(sink, header, startPos);
MergedPostingsListFetcher fetcher(dict, index1, index2);
WritePostings(sink, startPos, header, fetcher);
// Fill in the header.
uint64_t const finishPos = sink.Pos();
sink.Seek(startPos);
header.Serialize(sink);
sink.Seek(finishPos);
}
} // namespace search_base

View file

@ -0,0 +1,26 @@
#pragma once
#include "search/base/text_index/reader.hpp"
class FileWriter;
namespace search_base
{
// Merges two on-disk text indexes and writes them to a new one.
class TextIndexMerger
{
public:
// The merging process is as follows.
// 1. Dictionaries from both indexes are read into memory, merged
// and written to disk.
// 2. One uint32_t per entry is reserved in memory to calculate the
// offsets of the postings lists.
// 3. One token at a time, all postings for the token are read from
// both indexes into memory, unified and written to disk.
// 4. The offsets are written to disk.
//
// Note that the dictionary and offsets are kept in memory during the whole
// merging process.
static void Merge(TextIndexReader const & index1, TextIndexReader const & index2, FileWriter & sink);
};
} // namespace search_base

View file

@ -0,0 +1,88 @@
#pragma once
#include "search/base/text_index/header.hpp"
#include "search/base/text_index/text_index.hpp"
#include "search/base/text_index/utils.hpp"
#include "coding/varint.hpp"
#include "coding/write_to_sink.hpp"
#include <cstdint>
#include <functional>
#include <vector>
namespace search_base
{
struct TextIndexHeader;
// A helper class that fetches the postings lists for
// one token at a time. It is assumed that the tokens
// are enumerated in the lexicographic order.
class PostingsFetcher
{
public:
using Fn = std::function<void(uint32_t)>;
virtual ~PostingsFetcher() = default;
// Returns true when there are tokens left in the fetcher and false otherwise.
virtual bool IsValid() const = 0;
// Advances fetcher to the next token.
virtual void Advance() = 0;
// Calls |fn| for every posting for the current token. Initially,
// current token is the first token and then calls to Advance
// may be used to process the next token until the underlying
// source of the tokens is exhausted and the fetcher is no longer valid.
virtual void ForEachPosting(Fn const & fn) const = 0;
};
// Fetches the postings list one by one from |fetcher| and writes them
// to |sink|, updating the fields in |header| that correspond to the
// postings list.
// |startPos| marks the start of the entire text index and is needed to compute
// the offsets that are stored in |header|.
template <typename Sink>
void WritePostings(Sink & sink, uint64_t startPos, TextIndexHeader & header, PostingsFetcher & fetcher)
{
header.m_postingsStartsOffset = RelativePos(sink, startPos);
// An uint32_t for each 32-bit offset and an uint32_t for the dummy entry at the end.
WriteZeroesToSink(sink, sizeof(uint32_t) * (header.m_numTokens + 1));
header.m_postingsListsOffset = RelativePos(sink, startPos);
std::vector<uint32_t> postingsStarts;
postingsStarts.reserve(header.m_numTokens);
{
uint32_t last;
// todo(@m) s/uint32_t/Posting/ ?
auto writePostings = [&](uint32_t p)
{
CHECK(last == 0 || last < p, (last, p));
uint32_t const delta = p - last;
WriteVarUint(sink, delta);
last = p;
};
while (fetcher.IsValid())
{
postingsStarts.emplace_back(RelativePos(sink, startPos));
last = 0;
fetcher.ForEachPosting(writePostings);
fetcher.Advance();
}
}
// One more for convenience.
postingsStarts.emplace_back(RelativePos(sink, startPos));
{
uint64_t const savedPos = sink.Pos();
sink.Seek(startPos + header.m_postingsStartsOffset);
for (uint32_t const s : postingsStarts)
WriteToSink(sink, s);
CHECK_EQUAL(sink.Pos(), startPos + header.m_postingsListsOffset, ());
sink.Seek(savedPos);
}
}
} // namespace search_base

View file

@ -0,0 +1,78 @@
#pragma once
#include "search/base/text_index/dictionary.hpp"
#include "search/base/text_index/text_index.hpp"
#include "coding/file_reader.hpp"
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "base/assert.hpp"
#include "base/string_utils.hpp"
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
namespace search_base
{
// A reader class for on-demand reading of postings lists from disk.
class TextIndexReader
{
public:
explicit TextIndexReader(FileReader const & fileReader) : m_fileReader(fileReader)
{
ReaderSource<FileReader> headerSource(m_fileReader);
TextIndexHeader header;
header.Deserialize(headerSource);
uint64_t const dictStart = header.m_dictPositionsOffset;
uint64_t const dictEnd = header.m_postingsStartsOffset;
ReaderSource<FileReader> dictSource(m_fileReader.SubReader(dictStart, dictEnd - dictStart));
m_dictionary.Deserialize(dictSource, header);
uint64_t const postStart = header.m_postingsStartsOffset;
uint64_t const postEnd = header.m_postingsListsOffset;
ReaderSource<FileReader> postingsSource(m_fileReader.SubReader(postStart, postEnd - postStart));
m_postingsStarts.resize(header.m_numTokens + 1);
for (uint32_t & start : m_postingsStarts)
start = ReadPrimitiveFromSource<uint32_t>(postingsSource);
}
// Executes |fn| on every posting associated with |token|.
// The order of postings is not specified.
template <typename Fn>
void ForEachPosting(Token const & token, Fn && fn) const
{
size_t tokenId = 0;
if (!m_dictionary.GetTokenId(token, tokenId))
return;
CHECK_LESS(tokenId + 1, m_postingsStarts.size(), ());
ReaderSource<FileReader> source(
m_fileReader.SubReader(m_postingsStarts[tokenId], m_postingsStarts[tokenId + 1] - m_postingsStarts[tokenId]));
uint32_t last = 0;
while (source.Size() > 0)
{
last += ReadVarUint<uint32_t>(source);
fn(last);
}
}
template <typename Fn>
void ForEachPosting(strings::UniString const & token, Fn && fn) const
{
auto const utf8s = strings::ToUtf8(token);
ForEachPosting(std::move(utf8s), std::forward<Fn>(fn));
}
TextIndexDictionary const & GetDictionary() const { return m_dictionary; }
private:
FileReader m_fileReader;
TextIndexDictionary m_dictionary;
std::vector<uint32_t> m_postingsStarts;
};
} // namespace search_base

View file

@ -0,0 +1,20 @@
#include "search/base/text_index/text_index.hpp"
#include "base/assert.hpp"
#include "base/string_utils.hpp"
using namespace std;
namespace search_base
{
string DebugPrint(TextIndexVersion const & version)
{
switch (version)
{
case TextIndexVersion::V0: return "V0";
}
string ret = "Unknown TextIndexHeader version: " + strings::to_string(static_cast<uint8_t>(version));
ASSERT(false, (ret));
return ret;
}
} // namespace search_base

View file

@ -0,0 +1,42 @@
#pragma once
#include <cstdint>
#include <string>
// This file contains the structures needed to store an
// updatable text index on disk.
//
// The index maps tokens of string type (typically std::string or
// strings::UniString) to postings lists, i.e. to lists of entities
// called postings that encode the locations of the strings in the collection
// of the text documents that is being indexed. An example of a posting
// is a document id (docid). Another example is a pair of a document id and
// a position within the corresponding document.
//
// The updates are performed by rebuilding the index, either as a result
// of merging several indexes together, or as a result of clearing outdated
// entries from an old index.
//
// For version 0, the postings lists are docid arrays, i.e. arrays of unsigned
// 32-bit integers stored in increasing order.
// The structure of the index is:
// [header: version and offsets]
// [array containing the starting positions of tokens]
// [tokens, written without separators in the lexicographical order]
// [array containing the offsets for the postings lists]
// [postings lists, stored as delta-encoded varints]
//
// All offsets are measured relative to the start of the index.
namespace search_base
{
using Token = std::string;
using Posting = uint32_t;
enum class TextIndexVersion : uint8_t
{
V0 = 0,
Latest = V0
};
std::string DebugPrint(TextIndexVersion const & version);
} // namespace search_base

View file

@ -0,0 +1,14 @@
#pragma once
#include "base/checked_cast.hpp"
#include <cstdint>
namespace search_base
{
template <typename Sink>
uint32_t RelativePos(Sink & sink, uint64_t startPos)
{
return base::checked_cast<uint32_t>(sink.Pos() - startPos);
}
} // namespace search_base

View file

@ -0,0 +1,20 @@
#include "search/bookmarks/data.hpp"
#include <sstream>
using namespace std;
namespace search
{
namespace bookmarks
{
string DebugPrint(Data const & data)
{
ostringstream os;
os << "Data [";
os << "names: " << ::DebugPrint(data.GetNames()) << ", ";
os << "description: " << data.GetDescription() << "]";
return os.str();
}
} // namespace bookmarks
} // namespace search

View file

@ -0,0 +1,90 @@
#pragma once
#include "indexer/search_string_utils.hpp"
#include "kml/types.hpp"
#include "coding/string_utf8_multilang.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include <string>
#include <vector>
namespace search
{
namespace bookmarks
{
// TODO (@m, @y): add more features for a bookmark here, i.e. address, center.
class Data
{
public:
Data() = default;
Data(kml::BookmarkData const & bookmarkData, std::string const & locale)
: m_names(ExtractIndexableNames(bookmarkData, locale))
, m_description(kml::GetDefaultStr(bookmarkData.m_description))
{}
template <typename Fn>
void ForEachNameToken(Fn && fn) const
{
auto withDefaultLang = [&](strings::UniString const & token)
{
// Note that the Default Language here is not the same as in the kml library.
// Bookmark search by locale is not supported so every name is stored
// in the default branch of the search trie.
fn(StringUtf8Multilang::kDefaultCode, token);
};
for (auto const & name : m_names)
ForEachNormalizedToken(name, withDefaultLang);
}
template <typename Fn>
void ForEachDescriptionToken(Fn && fn) const
{
auto withDefaultLang = [&](strings::UniString const & token) { fn(StringUtf8Multilang::kDefaultCode, token); };
ForEachNormalizedToken(m_description, withDefaultLang);
}
std::vector<std::string> const & GetNames() const { return m_names; }
std::string const & GetDescription() const { return m_description; }
private:
std::vector<std::string> ExtractIndexableNames(kml::BookmarkData const & bookmarkData, std::string const & locale)
{
std::vector<std::string> names;
// Same as GetPreferredBookmarkName from the map library. Duplicated here to avoid dependency.
names.emplace_back(kml::GetPreferredBookmarkName(bookmarkData, locale));
names.emplace_back(kml::GetPreferredBookmarkStr(bookmarkData.m_name, locale));
// todo(@m) Platform's API does not allow to use |locale| here.
names.emplace_back(kml::GetLocalizedFeatureType(bookmarkData.m_featureTypes));
// Normalization is postponed. It is unlikely but we may still need original strings later.
// Trimming seems harmless, though.
for (auto & s : names)
strings::Trim(s);
base::SortUnique(names);
base::EraseIf(names, [](std::string const & s) { return s.empty(); });
return names;
}
// Names and custom names in all the locales that we are interested in.
// The locale set is fixed at startup and the relevant names are provided
// by the kml library. In case the user switches the device locale while
// running the app, the UI will adapt; however the search will not, and the
// bookmarks will not be reindexed. We consider this situation to be improbable
// enough to justify not storing redundant names here.
std::vector<std::string> m_names;
std::string m_description;
};
std::string DebugPrint(Data const & data);
} // namespace bookmarks
} // namespace search

View file

@ -0,0 +1,281 @@
#include "search/bookmarks/processor.hpp"
#include "search/emitter.hpp"
#include "base/assert.hpp"
#include "base/checked_cast.hpp"
#include "base/dfa_helpers.hpp"
#include "base/levenshtein_dfa.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
namespace search
{
namespace bookmarks
{
namespace
{
struct DocVecWrapper
{
explicit DocVecWrapper(DocVec const & dv) : m_dv(dv) {}
template <typename Fn>
void ForEachToken(Fn && fn) const
{
for (size_t i = 0; i < m_dv.GetNumTokens(); ++i)
fn(StringUtf8Multilang::kDefaultCode, m_dv.GetToken(i));
}
DocVec const & m_dv;
};
struct RankingInfo
{
bool operator<(RankingInfo const & rhs) const { return m_cosineSimilarity > rhs.m_cosineSimilarity; }
bool operator>(RankingInfo const & rhs) const { return rhs < *this; }
bool operator==(RankingInfo const & rhs) const { return !(*this < rhs) && !(*this > rhs); }
bool operator!=(RankingInfo const & rhs) const { return !(*this == rhs); }
double m_cosineSimilarity = 0.0;
};
struct IdInfoPair
{
IdInfoPair(Id const & id, RankingInfo const & info) : m_id(id), m_info(info) {}
bool operator<(IdInfoPair const & rhs) const
{
if (m_info != rhs.m_info)
return m_info < rhs.m_info;
return m_id < rhs.m_id;
}
Id m_id;
RankingInfo m_info;
};
void FillRankingInfo(QueryVec & qv, IdfMap & idfs, DocVec const & dv, RankingInfo & info)
{
info.m_cosineSimilarity = qv.Similarity(idfs, dv);
}
} // namespace
Processor::Processor(Emitter & emitter, base::Cancellable const & cancellable)
: m_emitter(emitter)
, m_cancellable(cancellable)
{}
void Processor::Reset()
{
m_index = {};
m_docs.clear();
m_indexDescriptions = false;
m_indexableGroups.clear();
m_idToGroup.clear();
m_bookmarksInGroup.clear();
}
void Processor::EnableIndexingOfDescriptions(bool enable)
{
m_indexDescriptions = enable;
}
void Processor::EnableIndexingOfBookmarkGroup(GroupId const & groupId, bool enable)
{
bool const wasIndexable = m_indexableGroups.count(groupId) > 0;
if (enable)
m_indexableGroups.insert(groupId);
else
m_indexableGroups.erase(groupId);
bool const nowIndexable = m_indexableGroups.count(groupId) > 0;
if (wasIndexable == nowIndexable)
return;
for (auto const & id : m_bookmarksInGroup[groupId])
if (nowIndexable)
AddToIndex(id);
else
EraseFromIndex(id);
}
void Processor::Add(Id const & id, Doc const & doc)
{
ASSERT_EQUAL(m_docs.count(id), 0, ());
DocVec::Builder builder;
doc.ForEachNameToken([&](int8_t /* lang */, strings::UniString const & token) { builder.Add(token); });
if (m_indexDescriptions)
doc.ForEachDescriptionToken([&](int8_t /* lang */, strings::UniString const & token) { builder.Add(token); });
DocVec const docVec(builder);
m_docs[id] = docVec;
}
void Processor::AddToIndex(Id const & id)
{
ASSERT_EQUAL(m_docs.count(id), 1, ());
m_index.Add(id, DocVecWrapper(m_docs[id]));
}
void Processor::Update(Id const & id, Doc const & doc)
{
auto group = kInvalidGroupId;
auto const groupIt = m_idToGroup.find(id);
if (groupIt != m_idToGroup.end())
{
// A copy to avoid use-after-free.
group = groupIt->second;
DetachFromGroup(id, group);
}
Erase(id);
Add(id, doc);
if (group != kInvalidGroupId)
AttachToGroup(id, group);
}
void Processor::Erase(Id const & id)
{
ASSERT_EQUAL(m_docs.count(id), 1, ());
ASSERT(m_idToGroup.find(id) == m_idToGroup.end(),
("A bookmark must be detached from all groups before being deleted."));
m_docs.erase(id);
}
void Processor::EraseFromIndex(Id const & id)
{
ASSERT_EQUAL(m_docs.count(id), 1, ());
auto const & docVec = m_docs[id];
m_index.Erase(id, DocVecWrapper(docVec));
}
void Processor::AttachToGroup(Id const & id, GroupId const & group)
{
auto const it = m_idToGroup.find(id);
if (it != m_idToGroup.end())
LOG(LWARNING, ("Tried to attach bookmark", id, "to group", group, "but it already belongs to group", it->second));
m_idToGroup[id] = group;
m_bookmarksInGroup[group].insert(id);
if (m_indexableGroups.count(group) > 0)
AddToIndex(id);
}
void Processor::DetachFromGroup(Id const & id, GroupId const & group)
{
auto const it = m_idToGroup.find(id);
if (it == m_idToGroup.end())
{
LOG(LWARNING, ("Tried to detach bookmark", id, "from group", group, "but it does not belong to any group"));
return;
}
if (it->second != group)
{
LOG(LWARNING, ("Tried to detach bookmark", id, "from group", group, "but it only belongs to group", it->second));
return;
}
m_idToGroup.erase(it);
m_bookmarksInGroup[group].erase(id);
if (m_indexableGroups.count(group) > 0)
EraseFromIndex(id);
auto const groupIt = m_bookmarksInGroup.find(group);
CHECK(groupIt != m_bookmarksInGroup.end(), (group, m_bookmarksInGroup));
if (groupIt->second.size() == 0)
m_bookmarksInGroup.erase(groupIt);
}
void Processor::Search(Params const & params) const
{
std::set<Id> ids;
auto insertId = [&ids](Id const & id, bool /* exactMatch */) { ids.insert(id); };
for (size_t i = 0; i < params.GetNumTokens(); ++i)
{
BailIfCancelled();
auto const & token = params.GetToken(i);
if (params.IsPrefixToken(i))
Retrieve<strings::PrefixDFAModifier<strings::LevenshteinDFA>>(token, insertId);
else
Retrieve<strings::LevenshteinDFA>(token, insertId);
}
IdfMap idfs(*this, 1.0 /* unknownIdf */);
auto qv = GetQueryVec(idfs, params);
std::vector<IdInfoPair> idInfos;
for (auto const & id : ids)
{
BailIfCancelled();
if (params.m_groupId != kInvalidGroupId)
{
auto const it = m_idToGroup.find(id);
if (it == m_idToGroup.end() || it->second != params.m_groupId)
continue;
}
auto it = m_docs.find(id);
CHECK(it != m_docs.end(), ("Can't find retrieved doc:", id));
auto const & doc = it->second;
RankingInfo info;
FillRankingInfo(qv, idfs, doc, info);
idInfos.emplace_back(id, info);
}
BailIfCancelled();
sort(idInfos.begin(), idInfos.end());
size_t numEmitted = 0;
for (auto const & idInfo : idInfos)
{
if (numEmitted >= params.m_maxNumResults)
break;
m_emitter.AddBookmarkResult(bookmarks::Result(idInfo.m_id));
++numEmitted;
}
}
void Processor::Finish(bool cancelled)
{
m_emitter.Finish(cancelled);
}
uint64_t Processor::GetNumDocs(strings::UniString const & token, bool isPrefix) const
{
return base::asserted_cast<uint64_t>(m_index.GetNumDocs(StringUtf8Multilang::kDefaultCode, token, isPrefix));
}
QueryVec Processor::GetQueryVec(IdfMap & idfs, QueryParams const & params) const
{
QueryVec::Builder builder;
for (size_t i = 0; i < params.GetNumTokens(); ++i)
{
auto const & token = params.GetToken(i).GetOriginal();
if (params.IsPrefixToken(i))
builder.SetPrefix(token);
else
builder.AddFull(token);
}
return {idfs, builder};
}
} // namespace bookmarks
} // namespace search

View file

@ -0,0 +1,105 @@
#pragma once
#include "search/base/mem_search_index.hpp"
#include "search/bookmarks/types.hpp"
#include "search/cancel_exception.hpp"
#include "search/doc_vec.hpp"
#include "search/feature_offset_match.hpp"
#include "search/idf_map.hpp"
#include "search/query_params.hpp"
#include "search/search_params.hpp"
#include "search/utils.hpp"
#include <unordered_map>
#include <unordered_set>
namespace base
{
class Cancellable;
}
namespace search
{
class Emitter;
namespace bookmarks
{
class Processor : public IdfMap::Delegate
{
public:
using Index = search_base::MemSearchIndex<Id>;
struct Params : public QueryParams
{
// If valid, only show results with bookmarks attached to |m_groupId|.
GroupId m_groupId = kInvalidGroupId;
size_t m_maxNumResults = SearchParams::kDefaultNumResultsEverywhere;
};
Processor(Emitter & emitter, base::Cancellable const & cancellable);
~Processor() override = default;
void Reset();
// By default, only bookmark names are indexed. This method
// should be used to enable or disable indexing bookmarks
// by their descriptions.
void EnableIndexingOfDescriptions(bool enable);
void EnableIndexingOfBookmarkGroup(GroupId const & groupId, bool enable);
// Adds a bookmark to Processor but does not index it.
void Add(Id const & id, Doc const & doc);
// Indexes an already added bookmark.
void AddToIndex(Id const & id);
// Updates a bookmark with a new |doc|. Re-indexes if the bookmarks
// is already attached to an indexable group.
void Update(Id const & id, Doc const & doc);
void Erase(Id const & id);
void EraseFromIndex(Id const & id);
void AttachToGroup(Id const & id, GroupId const & group);
void DetachFromGroup(Id const & id, GroupId const & group);
void Search(Params const & params) const;
void Finish(bool cancelled);
// IdfMap::Delegate overrides:
uint64_t GetNumDocs(strings::UniString const & token, bool isPrefix) const override;
private:
void BailIfCancelled() const { ::search::BailIfCancelled(m_cancellable); }
template <typename DFA, typename Fn>
void Retrieve(QueryParams::Token const & token, Fn && fn) const
{
SearchTrieRequest<DFA> request;
FillRequestFromToken(token, request);
request.m_langs.insert(StringUtf8Multilang::kDefaultCode);
MatchFeaturesInTrie(request, m_index.GetRootIterator(), [](Id const & /* id */) { return true; } /* filter */,
std::forward<Fn>(fn));
}
QueryVec GetQueryVec(IdfMap & idfs, QueryParams const & params) const;
Emitter & m_emitter;
base::Cancellable const & m_cancellable;
Index m_index;
std::unordered_map<Id, DocVec> m_docs;
bool m_indexDescriptions = false;
std::unordered_set<GroupId> m_indexableGroups;
// Currently a bookmark can belong to at most one group
// but in the future it is possible for a single bookmark to be
// attached to multiple groups.
std::unordered_map<Id, GroupId> m_idToGroup;
std::unordered_map<GroupId, std::unordered_set<Id>> m_bookmarksInGroup;
};
} // namespace bookmarks
} // namespace search

View file

@ -0,0 +1,20 @@
#pragma once
#include "search/bookmarks/types.hpp"
#include <vector>
namespace search
{
namespace bookmarks
{
struct Result
{
explicit Result(Id id) : m_id(id) {}
Id m_id = {};
};
using Results = std::vector<Result>;
} // namespace bookmarks
} // namespace search

View file

@ -0,0 +1,9 @@
#include "search/bookmarks/types.hpp"
namespace search
{
namespace bookmarks
{
GroupId constexpr kInvalidGroupId = std::numeric_limits<GroupId>::max();
} // namespace bookmarks
} // namespace search

View file

@ -0,0 +1,19 @@
#pragma once
#include "search/bookmarks/data.hpp"
#include <cstdint>
#include <limits>
namespace search
{
namespace bookmarks
{
// todo(@m) s/Id/DocId/g ?
using Id = uint64_t;
using GroupId = uint64_t;
using Doc = Data;
extern GroupId const kInvalidGroupId;
} // namespace bookmarks
} // namespace search

View file

@ -0,0 +1,17 @@
#pragma once
#include "base/cancellable.hpp"
#include "base/exception.hpp"
namespace search
{
// This exception can be thrown from the deep darkness of search and
// geometry retrieval for fast cancellation of time-consuming tasks.
DECLARE_EXCEPTION(CancelException, RootException);
inline void BailIfCancelled(base::Cancellable const & cancellable)
{
if (cancellable.IsCancelled())
MYTHROW(CancelException, ("Cancelled"));
}
} // namespace search

View file

@ -0,0 +1,84 @@
#include "search/categories_cache.hpp"
#include "search/mwm_context.hpp"
#include "search/retrieval.hpp"
#include "indexer/classificator.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/search_string_utils.hpp"
namespace search
{
using namespace std;
// CategoriesCache ---------------------------------------------------------------------------------
CBV CategoriesCache::Get(MwmContext const & context)
{
auto const id = context.m_handle.GetId();
auto const it = m_cache.find(id);
if (it != m_cache.cend())
return it->second;
auto cbv = Load(context);
m_cache[id] = cbv;
return cbv;
}
CBV CategoriesCache::Load(MwmContext const & context) const
{
auto const & c = classif();
// Any DFA will do, since we only use requests's m_categories,
// but the interface of Retrieval forces us to make a choice.
SearchTrieRequest<strings::UniStringDFA> request;
// m_categories usually has truncated types; add them together with their subtrees.
m_categories.ForEach([&request, &c](uint32_t const type)
{
c.ForEachInSubtree([&](uint32_t descendantType)
{ request.m_categories.emplace_back(FeatureTypeToString(c.GetIndexForType(descendantType))); }, type);
});
Retrieval retrieval(context, m_cancellable);
return retrieval.RetrieveAddressFeatures(request).m_features;
}
// StreetsCache ------------------------------------------------------------------------------------
StreetsCache::StreetsCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsStreetOrSquareChecker::Instance(), cancellable)
{}
// SuburbsCache ------------------------------------------------------------------------------------
SuburbsCache::SuburbsCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsSuburbChecker::Instance(), cancellable)
{}
// VillagesCache -----------------------------------------------------------------------------------
VillagesCache::VillagesCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsVillageChecker::Instance(), cancellable)
{}
// CountriesCache ----------------------------------------------------------------------------------
CountriesCache::CountriesCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsCountryChecker::Instance(), cancellable)
{}
// StatesCache -------------------------------------------------------------------------------------
StatesCache::StatesCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsStateChecker::Instance(), cancellable)
{}
// CitiesTownsOrVillagesCache ----------------------------------------------------------------------
CitiesTownsOrVillagesCache::CitiesTownsOrVillagesCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsCityTownOrVillageChecker::Instance(), cancellable)
{}
// HotelsCache -------------------------------------------------------------------------------------
HotelsCache::HotelsCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsHotelChecker::Instance(), cancellable)
{}
// FoodCache ---------------------------------------------------------------------------------------
FoodCache::FoodCache(base::Cancellable const & cancellable)
: CategoriesCache(ftypes::IsEatChecker::Instance(), cancellable)
{}
} // namespace search

View file

@ -0,0 +1,96 @@
#pragma once
#include "search/categories_set.hpp"
#include "search/cbv.hpp"
#include "indexer/mwm_set.hpp"
#include "base/cancellable.hpp"
#include <map>
#include <vector>
namespace search
{
class MwmContext;
class CategoriesCache
{
public:
template <typename TypesSource>
CategoriesCache(TypesSource const & source, base::Cancellable const & cancellable) : m_cancellable(cancellable)
{
source.ForEachType([this](uint32_t type) { m_categories.Add(type); });
}
CategoriesCache(std::vector<uint32_t> const & types, base::Cancellable const & cancellable)
: m_cancellable(cancellable)
{
for (uint32_t type : types)
m_categories.Add(type);
}
virtual ~CategoriesCache() = default;
CBV Get(MwmContext const & context);
inline void Clear() { m_cache.clear(); }
private:
CBV Load(MwmContext const & context) const;
CategoriesSet m_categories;
base::Cancellable const & m_cancellable;
std::map<MwmSet::MwmId, CBV> m_cache;
};
class StreetsCache : public CategoriesCache
{
public:
StreetsCache(base::Cancellable const & cancellable);
};
class SuburbsCache : public CategoriesCache
{
public:
SuburbsCache(base::Cancellable const & cancellable);
};
class VillagesCache : public CategoriesCache
{
public:
VillagesCache(base::Cancellable const & cancellable);
};
class CountriesCache : public CategoriesCache
{
public:
CountriesCache(base::Cancellable const & cancellable);
};
class StatesCache : public CategoriesCache
{
public:
StatesCache(base::Cancellable const & cancellable);
};
// Used for cities/towns/villages from world. Currently we do not have villages in World.mwm but
// it may be good to put some important villages to it: mountain/beach resorts.
class CitiesTownsOrVillagesCache : public CategoriesCache
{
public:
CitiesTownsOrVillagesCache(base::Cancellable const & cancellable);
};
class HotelsCache : public CategoriesCache
{
public:
HotelsCache(base::Cancellable const & cancellable);
};
class FoodCache : public CategoriesCache
{
public:
FoodCache(base::Cancellable const & cancellable);
};
} // namespace search

View file

@ -0,0 +1,34 @@
#pragma once
#include "indexer/classificator.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/macros.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include <cstdint>
#include <unordered_set>
namespace search
{
class CategoriesSet
{
public:
CategoriesSet() : m_classificator(classif()) {}
inline void Add(uint32_t type) { m_categories.insert(type); }
template <typename Fn>
void ForEach(Fn && fn) const
{
std::for_each(m_categories.begin(), m_categories.end(), std::forward<Fn>(fn));
}
private:
Classificator const & m_classificator;
std::unordered_set<uint32_t> m_categories;
DISALLOW_COPY_AND_MOVE(CategoriesSet);
};
} // namespace search

126
libs/search/cbv.cpp Normal file
View file

@ -0,0 +1,126 @@
#include "search/cbv.hpp"
#include <limits>
#include <vector>
using namespace std;
namespace search
{
namespace
{
uint64_t constexpr kModulo = 18446744073709551557LLU;
} // namespace
// static
CBV const & CBV::GetFull()
{
static CBV const fullCBV(true /*full*/);
return fullCBV;
}
CBV::CBV(unique_ptr<coding::CompressedBitVector> p) : m_p(std::move(p)) {}
CBV::CBV(CBV && cbv) : m_p(std::move(cbv.m_p)), m_isFull(cbv.m_isFull)
{
cbv.m_isFull = false;
}
CBV::CBV(bool full) : m_isFull(full) {}
CBV & CBV::operator=(unique_ptr<coding::CompressedBitVector> p)
{
m_p = std::move(p);
m_isFull = false;
return *this;
}
CBV & CBV::operator=(CBV && rhs)
{
if (this == &rhs)
return *this;
m_p = std::move(rhs.m_p);
m_isFull = rhs.m_isFull;
rhs.m_isFull = false;
return *this;
}
void CBV::SetFull()
{
m_p.Reset();
m_isFull = true;
}
void CBV::Reset()
{
m_p.Reset();
m_isFull = false;
}
bool CBV::HasBit(uint64_t id) const
{
if (IsFull())
return true;
if (IsEmpty())
return false;
return m_p->GetBit(id);
}
uint64_t CBV::PopCount() const
{
ASSERT(!IsFull(), ());
if (IsEmpty())
return 0;
return m_p->PopCount();
}
CBV CBV::Union(CBV const & rhs) const
{
if (IsFull() || rhs.IsEmpty())
return *this;
if (IsEmpty() || rhs.IsFull())
return rhs;
return CBV(coding::CompressedBitVector::Union(*m_p, *rhs.m_p));
}
CBV CBV::Intersect(CBV const & rhs) const
{
if (IsFull() || rhs.IsEmpty())
return rhs;
if (IsEmpty() || rhs.IsFull())
return *this;
return CBV(coding::CompressedBitVector::Intersect(*m_p, *rhs.m_p));
}
CBV CBV::Take(uint64_t n) const
{
if (IsEmpty())
return *this;
if (IsFull())
{
vector<uint64_t> groups(static_cast<size_t>((n + 63) / 64), numeric_limits<uint64_t>::max());
uint64_t const r = n % 64;
if (r != 0)
{
ASSERT(!groups.empty(), ());
groups.back() = (static_cast<uint64_t>(1) << r) - 1;
}
return CBV(coding::DenseCBV::BuildFromBitGroups(std::move(groups)));
}
return CBV(m_p->LeaveFirstSetNBits(n));
}
uint64_t CBV::Hash() const
{
if (IsEmpty())
return 0;
if (IsFull())
return kModulo;
return coding::CompressedBitVectorHasher::Hash(*m_p) % kModulo;
}
} // namespace search

64
libs/search/cbv.hpp Normal file
View file

@ -0,0 +1,64 @@
#pragma once
#include "coding/compressed_bit_vector.hpp"
#include "base/ref_counted.hpp"
#include <cstdint>
#include <memory>
#include <utility>
namespace search
{
// A wrapper around coding::CompressedBitVector that augments the
// latter with the "full" state and uses reference counting for
// ownership sharing.
class CBV
{
public:
static CBV const & GetFull();
CBV() = default;
explicit CBV(std::unique_ptr<coding::CompressedBitVector> p);
CBV(CBV const & cbv) = default;
CBV(CBV && cbv);
inline operator bool() const { return !IsEmpty(); }
CBV & operator=(std::unique_ptr<coding::CompressedBitVector> p);
CBV & operator=(CBV const & rhs) = default;
CBV & operator=(CBV && rhs);
void SetFull();
void Reset();
inline bool IsEmpty() const { return !m_isFull && coding::CompressedBitVector::IsEmpty(m_p.Get()); }
inline bool IsFull() const { return m_isFull; }
bool HasBit(uint64_t id) const;
uint64_t PopCount() const;
template <typename Fn>
void ForEach(Fn && fn) const
{
ASSERT(!m_isFull, ());
if (!IsEmpty())
coding::CompressedBitVectorEnumerator::ForEach(*m_p, std::forward<Fn>(fn));
}
CBV Union(CBV const & rhs) const;
CBV Intersect(CBV const & rhs) const;
// Takes first set |n| bits.
CBV Take(uint64_t n) const;
uint64_t Hash() const;
private:
explicit CBV(bool full);
base::RefCountPtr<coding::CompressedBitVector> m_p;
// True iff all bits are set to one.
bool m_isFull = false;
};
} // namespace search

View file

@ -0,0 +1,139 @@
#include "search/cities_boundaries_table.hpp"
#include "search/categories_cache.hpp"
#include "search/localities_source.hpp"
#include "search/mwm_context.hpp"
#include "indexer/cities_boundaries_serdes.hpp"
#include "indexer/mwm_set.hpp"
#include "indexer/utils.hpp"
#include "coding/reader.hpp"
#include "base/assert.hpp"
#include "base/cancellable.hpp"
#include "base/checked_cast.hpp"
#include "base/logging.hpp"
#include <algorithm>
#include <sstream>
namespace search
{
using namespace indexer;
using namespace std;
// CitiesBoundariesTable::Boundaries ---------------------------------------------------------------
bool CitiesBoundariesTable::Boundaries::HasPoint(m2::PointD const & p) const
{
return any_of(m_boundaries.begin(), m_boundaries.end(), [&](CityBoundary const & b) { return b.HasPoint(p, m_eps); });
}
std::string DebugPrint(CitiesBoundariesTable::Boundaries const & boundaries)
{
std::ostringstream os;
os << "Boundaries [";
os << ::DebugPrint(boundaries.m_boundaries) << ", ";
os << "eps: " << boundaries.m_eps;
os << "]";
return os.str();
}
// CitiesBoundariesTable ---------------------------------------------------------------------------
bool CitiesBoundariesTable::Load()
{
auto handle = FindWorld(m_dataSource);
if (!handle.IsAlive())
{
LOG(LWARNING, ("Can't find World map file."));
return false;
}
// Skip if table was already loaded from this file.
if (handle.GetId() == m_mwmId)
return true;
MwmContext context(std::move(handle));
base::Cancellable const cancellable;
auto const localities = CategoriesCache(LocalitiesSource{}, cancellable).Get(context);
auto const & cont = context.m_value.m_cont;
if (!cont.IsExist(CITIES_BOUNDARIES_FILE_TAG))
{
LOG(LWARNING, ("No cities boundaries table in the world map."));
return false;
}
vector<vector<CityBoundary>> all;
double precision;
try
{
auto reader = cont.GetReader(CITIES_BOUNDARIES_FILE_TAG);
ReaderSource<ReaderPtr<ModelReader>> source(reader);
CitiesBoundariesSerDes::Deserialize(source, all, precision);
}
catch (Reader::Exception const & e)
{
LOG(LERROR, ("Can't read cities boundaries table from the world map:", e.Msg()));
return false;
}
if (all.size() != localities.PopCount())
{
LOG(LERROR, ("Wrong number of boundaries, expected:", localities.PopCount(), "actual:", all.size()));
return false;
}
m_mwmId = context.GetId();
m_table.clear();
m_eps = precision;
size_t idx = 0, notEmpty = 0;
localities.ForEach([&](uint64_t fid)
{
if (!all[idx].empty())
{
CHECK(m_table.emplace(base::asserted_cast<uint32_t>(fid), std::move(all[idx])).second, ());
++notEmpty;
}
++idx;
});
LOG(LDEBUG, ("Localities count =", idx, "; with boundary =", notEmpty));
return true;
}
bool CitiesBoundariesTable::Get(FeatureID const & fid, Boundaries & bs) const
{
if (fid.m_mwmId != m_mwmId)
return false;
return Get(fid.m_index, bs);
}
bool CitiesBoundariesTable::Get(uint32_t fid, Boundaries & bs) const
{
auto const it = m_table.find(fid);
if (it == m_table.end())
return false;
bs = Boundaries(it->second, m_eps);
return true;
}
void GetCityBoundariesInRectForTesting(CitiesBoundariesTable const & table, m2::RectD const & rect,
vector<uint32_t> & featureIds)
{
featureIds.clear();
for (auto const & kv : table.m_table)
{
for (auto const & cb : kv.second)
{
if (rect.IsIntersect(cb.m_bbox.ToRect()))
{
featureIds.push_back(kv.first);
break;
}
}
}
}
} // namespace search

View file

@ -0,0 +1,86 @@
#pragma once
#include "indexer/city_boundary.hpp"
#include "indexer/feature_decl.hpp"
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
#include <string>
#include <unordered_map>
#include <vector>
class DataSource;
namespace search
{
class CitiesBoundariesTable
{
friend void GetCityBoundariesInRectForTesting(CitiesBoundariesTable const &, m2::RectD const & rect,
std::vector<uint32_t> & featureIds);
public:
class Boundaries
{
public:
Boundaries() = default;
Boundaries(std::vector<indexer::CityBoundary> const & boundaries, double eps) : m_boundaries(boundaries), m_eps(eps)
{}
// Returns true iff |p| is inside any of the regions bounded by
// |*this|.
bool HasPoint(m2::PointD const & p) const;
m2::RectD GetLimitRect() const
{
m2::RectD rect;
for (auto const & boundary : m_boundaries)
{
rect.Add(boundary.m_bbox.Min());
rect.Add(boundary.m_bbox.Max());
}
return rect;
}
size_t GetCount() const { return m_boundaries.size(); }
template <class FnT>
void ForEachBoundary(FnT && fn) const
{
for (size_t i = 0; i < m_boundaries.size(); ++i)
fn(m_boundaries[i], i);
}
friend std::string DebugPrint(Boundaries const & boundaries);
private:
std::vector<indexer::CityBoundary> m_boundaries;
double m_eps = 0.0;
};
explicit CitiesBoundariesTable(DataSource const & dataSource) : m_dataSource(dataSource) {}
bool Load();
bool Has(FeatureID const & fid) const { return fid.m_mwmId == m_mwmId && Has(fid.m_index); }
bool Has(uint32_t fid) const { return m_table.find(fid) != m_table.end(); }
bool Get(FeatureID const & fid, Boundaries & bs) const;
bool Get(uint32_t fid, Boundaries & bs) const;
size_t GetSize() const { return m_table.size(); }
private:
DataSource const & m_dataSource;
MwmSet::MwmId m_mwmId;
std::unordered_map<uint32_t, std::vector<indexer::CityBoundary>> m_table;
double m_eps = 0.0;
};
/// \brief Fills |featureIds| with feature ids of city boundaries if bounding rect of
/// the city boundary crosses |rect|.
/// \note This method is inefficient and is written for debug and test purposes only.
void GetCityBoundariesInRectForTesting(CitiesBoundariesTable const &, m2::RectD const & rect,
std::vector<uint32_t> & featureIds);
} // namespace search

View file

@ -0,0 +1,39 @@
#include "search/city_finder.hpp"
#include "indexer/feature_decl.hpp"
using namespace std;
namespace search
{
CityFinder::CityFinder(DataSource const & dataSource)
: m_unusedBoundaries(dataSource)
, m_unusedCache(m_cancellable)
, m_finder(dataSource, m_unusedBoundaries, m_unusedCache)
{}
string CityFinder::GetCityName(m2::PointD const & p, int8_t lang)
{
string_view city;
m_finder.GetLocality(p, [&](LocalityItem const & item) { item.GetSpecifiedOrDefaultName(lang, city); });
// Return string, because m_finder.GetLocality() is not persistent.
return std::string(city);
}
string CityFinder::GetCityReadableName(m2::PointD const & p)
{
string_view city;
m_finder.GetLocality(p, [&](LocalityItem const & item) { item.GetReadableName(city); });
// Return string, because m_finder.GetLocality() is not persistent.
return std::string(city);
}
FeatureID CityFinder::GetCityFeatureID(m2::PointD const & p)
{
FeatureID id;
m_finder.GetLocality(p, [&id](LocalityItem const & item) { id = item.m_id; });
return id;
}
} // namespace search

View file

@ -0,0 +1,36 @@
#pragma once
#include "search/categories_cache.hpp"
#include "search/locality_finder.hpp"
#include "geometry/point2d.hpp"
#include "base/cancellable.hpp"
#include <cstdint>
#include <string>
class DataSource;
struct FeatureID;
namespace search
{
class CityFinder
{
public:
// TODO (@milchakov): consider to reuse locality finder from search
// engine. Otherwise, CityFinder won't benefit from approximated
// cities boundaries.
explicit CityFinder(DataSource const & dataSource);
std::string GetCityName(m2::PointD const & p, int8_t lang);
std::string GetCityReadableName(m2::PointD const & p);
FeatureID GetCityFeatureID(m2::PointD const & p);
private:
base::Cancellable m_cancellable;
search::CitiesBoundariesTable m_unusedBoundaries;
search::VillagesCache m_unusedCache;
search::LocalityFinder m_finder;
};
} // namespace search

31
libs/search/common.hpp Normal file
View file

@ -0,0 +1,31 @@
#pragma once
#include "indexer/categories_holder.hpp"
#include "base/buffer_vector.hpp"
#include "base/small_set.hpp"
#include "base/string_utils.hpp"
namespace search
{
// The prefix is stored separately.
// todo(@m, @y) Find a way (similar to TokenSlice maybe?) to unify
// the prefix and non-prefix tokens.
using QueryTokens = buffer_vector<strings::UniString, 32>;
using Locales = base::SafeSmallSet<CategoriesHolder::kLocaleMapping.size() + 1>;
/// Upper bound for max count of tokens for indexing and scoring.
size_t constexpr kMaxNumTokens = 32;
size_t constexpr kMaxNumSuggests = 5;
struct QueryString
{
std::string m_query; ///< raw UTF8 query string
QueryTokens m_tokens; ///< splitted by UniChar tokens (not including last prefix)
strings::UniString m_prefix; ///< last prefix or empty (if query is ended with separator)
bool IsEmpty() const { return m_tokens.empty() && m_prefix.empty(); }
};
} // namespace search

View file

@ -0,0 +1,96 @@
#include "search/cuisine_filter.hpp"
#include "indexer/cuisines.hpp"
#include "indexer/feature.hpp"
#include "indexer/feature_meta.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "platform/mwm_traits.hpp"
#include "base/assert.hpp"
#include "base/checked_cast.hpp"
#include <algorithm>
using namespace std;
namespace search
{
namespace cuisine_filter
{
// Description -------------------------------------------------------------------------------------
Description::Description(FeatureType & ft)
{
m_types.clear();
ft.ForEachType([this](uint32_t t)
{
if (ftypes::IsCuisineChecker::Instance()(t))
m_types.push_back(t);
});
}
CuisineFilter::ScopedFilter::ScopedFilter(MwmSet::MwmId const & mwmId, Descriptions const & descriptions,
vector<uint32_t> const & types)
: m_mwmId(mwmId)
, m_descriptions(descriptions)
, m_types(types)
{
sort(m_types.begin(), m_types.end());
}
bool CuisineFilter::ScopedFilter::Matches(FeatureID const & fid) const
{
if (fid.m_mwmId != m_mwmId)
return false;
auto it = lower_bound(m_descriptions.begin(), m_descriptions.end(), make_pair(fid.m_index, Description{}),
[](pair<uint32_t, Description> const & lhs, pair<uint32_t, Description> const & rhs)
{ return lhs.first < rhs.first; });
if (it == m_descriptions.end() || it->first != fid.m_index)
return false;
for (auto const t : it->second.m_types)
if (binary_search(m_types.begin(), m_types.end(), t))
return true;
return false;
}
// CuisineFilter ------------------------------------------------------------------------------------
CuisineFilter::CuisineFilter(FoodCache & food) : m_food(food) {}
unique_ptr<CuisineFilter::ScopedFilter> CuisineFilter::MakeScopedFilter(MwmContext const & context,
vector<uint32_t> const & types)
{
if (types.empty())
return {};
return make_unique<ScopedFilter>(context.GetId(), GetDescriptions(context), types);
}
void CuisineFilter::ClearCaches()
{
m_descriptions.clear();
}
CuisineFilter::Descriptions const & CuisineFilter::GetDescriptions(MwmContext const & context)
{
auto const & mwmId = context.GetId();
auto const it = m_descriptions.find(mwmId);
if (it != m_descriptions.end())
return it->second;
auto & value = context.m_value;
version::MwmTraits mwmTraits(value.GetMwmVersion());
auto const food = m_food.Get(context);
auto & descriptions = m_descriptions[mwmId];
food.ForEach([&descriptions, &context](uint64_t bit)
{
auto const id = base::asserted_cast<uint32_t>(bit);
auto ft = context.GetFeature(id);
if (ft)
descriptions.emplace_back(id, Description(*ft));
});
return descriptions;
}
} // namespace cuisine_filter
} // namespace search

View file

@ -0,0 +1,58 @@
#pragma once
#include "search/categories_cache.hpp"
#include "search/mwm_context.hpp"
#include "indexer/mwm_set.hpp"
#include <map>
#include <memory>
#include <utility>
#include <vector>
class FeatureType;
namespace search
{
namespace cuisine_filter
{
struct Description
{
Description() = default;
Description(FeatureType & ft);
std::vector<uint32_t> m_types;
};
class CuisineFilter
{
public:
using Descriptions = std::vector<std::pair<uint32_t, Description>>;
class ScopedFilter
{
public:
ScopedFilter(MwmSet::MwmId const & mwmId, Descriptions const & descriptions, std::vector<uint32_t> const & types);
bool Matches(FeatureID const & fid) const;
private:
MwmSet::MwmId const m_mwmId;
Descriptions const & m_descriptions;
std::vector<uint32_t> m_types;
};
CuisineFilter(FoodCache & food);
std::unique_ptr<ScopedFilter> MakeScopedFilter(MwmContext const & context, std::vector<uint32_t> const & types);
void ClearCaches();
private:
Descriptions const & GetDescriptions(MwmContext const & context);
FoodCache & m_food;
std::map<MwmSet::MwmId, Descriptions> m_descriptions;
};
} // namespace cuisine_filter
} // namespace search

View file

@ -0,0 +1,28 @@
#include "search/displayed_categories.hpp"
#include "base/macros.hpp"
#include <algorithm>
namespace search
{
DisplayedCategories::DisplayedCategories(CategoriesHolder const & holder) : m_holder(holder)
{
m_keys = {"category_eat", "category_hotel", "category_food", "category_tourism",
"category_wifi", "category_transport", "category_fuel", "category_parking",
"category_shopping", "category_secondhand", "category_atm", "category_nightlife",
"category_children", "category_bank", "category_entertainment", "category_water",
"category_hospital", "category_pharmacy", "category_recycling", "category_rv",
"category_police", "category_toilet", "category_post"};
}
void DisplayedCategories::Modify(CategoriesModifier & modifier)
{
modifier.Modify(m_keys);
}
std::vector<std::string> const & DisplayedCategories::GetKeys() const
{
return m_keys;
}
} // namespace search

View file

@ -0,0 +1,57 @@
#pragma once
#include "indexer/categories_holder.hpp"
#include <string>
#include <vector>
namespace search
{
class CategoriesModifier;
// *NOTE* This class is not thread-safe.
class DisplayedCategories
{
public:
using Keys = std::vector<std::string>;
DisplayedCategories(CategoriesHolder const & holder);
void Modify(CategoriesModifier & modifier);
// Returns a list of English names of displayed categories for the categories search tab.
// The list may be modified during the application runtime in order to support sponsored or
// featured categories. Keys may be used as parts of resources ids.
Keys const & GetKeys() const;
// Calls |fn| on each pair (synonym name, synonym locale) for the
// |key|.
template <typename Fn>
void ForEachSynonym(std::string const & key, Fn && fn) const
{
auto const & translations = m_holder.GetGroupTranslations();
auto const it = translations.find("@" + key);
if (it == translations.end())
return;
for (auto const & name : it->second)
fn(name.m_name, CategoriesHolder::MapIntegerToLocale(name.m_locale));
}
static bool IsLanguageSupported(std::string_view locale)
{
return CategoriesHolder::MapLocaleToInteger(locale) != CategoriesHolder::kUnsupportedLocaleCode;
}
private:
CategoriesHolder const & m_holder;
Keys m_keys;
};
class CategoriesModifier
{
public:
virtual ~CategoriesModifier() = default;
virtual void Modify(DisplayedCategories::Keys & keys) = 0;
};
} // namespace search

233
libs/search/doc_vec.cpp Normal file
View file

@ -0,0 +1,233 @@
#include "search/doc_vec.hpp"
#include <limits>
namespace search
{
using namespace std;
namespace
{
// Accumulates frequencies of equal tokens in |tfs|. Result is sorted
// by tokens.
void SortAndMerge(vector<strings::UniString> tokens, vector<TokenFrequencyPair> & tfs)
{
ASSERT(tfs.empty(), ());
sort(tokens.begin(), tokens.end());
for (size_t i = 0; i < tokens.size(); ++i)
if (tfs.empty() || tfs.back().m_token != tokens[i])
tfs.emplace_back(tokens[i], 1 /* frequency */);
else
++tfs.back().m_frequency;
}
double GetTfIdf(double tf, double idf)
{
return tf * idf;
}
double GetWeightImpl(IdfMap & idfs, TokenFrequencyPair const & tf, bool isPrefix)
{
return GetTfIdf(tf.m_frequency, idfs.Get(tf.m_token, isPrefix));
}
double GetSqrWeightImpl(IdfMap & idfs, TokenFrequencyPair const & tf, bool isPrefix)
{
auto const w = GetWeightImpl(idfs, tf, isPrefix);
return w * w;
}
// Computes squared L2 norm of vector of tokens.
double SqrL2(IdfMap & idfs, vector<TokenFrequencyPair> const & tfs)
{
double sum = 0;
for (auto const & tf : tfs)
sum += GetSqrWeightImpl(idfs, tf, false /* isPrefix */);
return sum;
}
// Computes squared L2 norm of vector of tokens + prefix token.
double SqrL2(IdfMap & idfs, vector<TokenFrequencyPair> const & tfs, optional<strings::UniString> const & prefix)
{
auto result = SqrL2(idfs, tfs);
if (prefix)
result += GetSqrWeightImpl(idfs, TokenFrequencyPair(*prefix, 1 /* frequency */), true /* isPrefix */);
return result;
}
} // namespace
// TokenFrequencyPair ------------------------------------------------------------------------------
bool TokenFrequencyPair::operator<(TokenFrequencyPair const & rhs) const
{
if (m_token != rhs.m_token)
return m_token < rhs.m_token;
return m_frequency < rhs.m_frequency;
}
void TokenFrequencyPair::Swap(TokenFrequencyPair & rhs)
{
m_token.swap(rhs.m_token);
swap(m_frequency, rhs.m_frequency);
}
string DebugPrint(TokenFrequencyPair const & tf)
{
ostringstream os;
os << "TokenFrequencyPair [" << DebugPrint(tf.m_token) << ", " << tf.m_frequency << "]";
return os.str();
}
// DocVec ------------------------------------------------------------------------------------------
DocVec::DocVec(Builder const & builder)
{
SortAndMerge(builder.m_tokens, m_tfs);
}
double DocVec::Norm(IdfMap & idfs) const
{
return SqrL2(idfs, m_tfs);
}
strings::UniString const & DocVec::GetToken(size_t i) const
{
ASSERT_LESS(i, m_tfs.size(), ());
return m_tfs[i].m_token;
}
double DocVec::GetIdf(IdfMap & idfs, size_t i) const
{
ASSERT_LESS(i, m_tfs.size(), ());
return idfs.Get(m_tfs[i].m_token, false /* isPrefix */);
}
double DocVec::GetWeight(IdfMap & idfs, size_t i) const
{
ASSERT_LESS(i, m_tfs.size(), ());
return GetWeightImpl(idfs, m_tfs[i], false /* isPrefix */);
}
// QueryVec ----------------------------------------------------------------------------------------
QueryVec::QueryVec(IdfMap & idfs, Builder const & builder) : m_idfs(&idfs), m_prefix(builder.m_prefix)
{
SortAndMerge(builder.m_tokens, m_tfs);
}
double QueryVec::Similarity(IdfMap & docIdfs, DocVec const & rhs)
{
size_t kInvalidIndex = numeric_limits<size_t>::max();
if (Empty() && rhs.Empty())
return 1.0;
if (Empty() || rhs.Empty())
return 0.0;
vector<size_t> rsMatchTo(rhs.GetNumTokens(), kInvalidIndex);
double dot = 0;
{
size_t i = 0, j = 0;
while (i < m_tfs.size() && j < rhs.GetNumTokens())
{
auto const & lt = m_tfs[i].m_token;
auto const & rt = rhs.GetToken(j);
if (lt < rt)
{
++i;
}
else if (lt > rt)
{
++j;
}
else
{
dot += GetFullTokenWeight(i) * rhs.GetWeight(docIdfs, j);
rsMatchTo[j] = i;
++i;
++j;
}
}
}
auto const ln = Norm();
auto const rn = rhs.Norm(docIdfs);
// This similarity metric assumes that prefix is not matched in the document.
double const similarityNoPrefix = ln > 0 && rn > 0 ? dot / sqrt(ln) / sqrt(rn) : 0;
if (!m_prefix)
return similarityNoPrefix;
double similarityWithPrefix = 0;
auto const & prefix = *m_prefix;
// Let's try to match prefix token with all tokens in the
// document, and compute the best cosine distance.
for (size_t j = 0; j < rhs.GetNumTokens(); ++j)
{
auto const & t = rhs.GetToken(j);
if (!strings::StartsWith(t.begin(), t.end(), prefix.begin(), prefix.end()))
continue;
auto const i = rsMatchTo[j];
double num = 0;
double denom = 0;
if (i == kInvalidIndex)
{
// If this document token is not matched with full tokens in a
// query, we need to update its weight in the cosine distance
// - so we need to update correspondingly dot product and
// vector norms of query and doc.
auto const oldW = GetPrefixTokenWeight();
auto const newW = GetTfIdf(1 /* frequency */, rhs.GetIdf(docIdfs, j));
auto const l = max(0.0, ln - oldW * oldW + newW * newW);
num = dot + newW * rhs.GetWeight(docIdfs, j);
denom = sqrt(l) * sqrt(rn);
}
else
{
// If this document token is already matched with |i|-th full
// token in a query - we know that completion of the prefix
// token is the |i|-th query token. So we need to update
// correspondingly dot product and vector norm of the query.
auto const oldFW = GetFullTokenWeight(i);
auto const oldPW = GetPrefixTokenWeight();
auto const tf = m_tfs[i].m_frequency + 1;
auto const idf = m_idfs->Get(m_tfs[i].m_token, false /* isPrefix */);
auto const newW = GetTfIdf(tf, idf);
auto const l = ln - oldFW * oldFW - oldPW * oldPW + newW * newW;
num = dot + (newW - oldFW) * rhs.GetWeight(docIdfs, j);
denom = sqrt(l) * sqrt(rn);
}
if (denom > 0)
similarityWithPrefix = max(similarityWithPrefix, num / denom);
}
return max(similarityWithPrefix, similarityNoPrefix);
}
double QueryVec::Norm()
{
return SqrL2(*m_idfs, m_tfs, m_prefix);
}
double QueryVec::GetFullTokenWeight(size_t i)
{
ASSERT_LESS(i, m_tfs.size(), ());
return GetWeightImpl(*m_idfs, m_tfs[i], false /* isPrefix */);
}
double QueryVec::GetPrefixTokenWeight()
{
ASSERT(m_prefix, ());
return GetWeightImpl(*m_idfs, TokenFrequencyPair(*m_prefix, 1 /* frequency */), true /* isPrefix */);
}
} // namespace search

134
libs/search/doc_vec.hpp Normal file
View file

@ -0,0 +1,134 @@
#pragma once
#include "search/idf_map.hpp"
#include "base/assert.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <sstream>
#include <string>
#include <utility>
namespace search
{
class IdfMap;
struct TokenFrequencyPair
{
TokenFrequencyPair() = default;
template <typename Token>
TokenFrequencyPair(Token && token, uint64_t frequency) : m_token(std::forward<Token>(token))
, m_frequency(frequency)
{}
bool operator<(TokenFrequencyPair const & rhs) const;
void Swap(TokenFrequencyPair & rhs);
strings::UniString m_token;
uint64_t m_frequency = 0;
};
std::string DebugPrint(TokenFrequencyPair const & tf);
// This class represents a document in a vector space of tokens.
class DocVec
{
public:
class Builder
{
public:
template <typename Token>
void Add(Token && token)
{
m_tokens.emplace_back(std::forward<Token>(token));
}
private:
friend class DocVec;
std::vector<strings::UniString> m_tokens;
};
DocVec() = default;
explicit DocVec(Builder const & builder);
// Computes vector norm of the doc.
double Norm(IdfMap & idfs) const;
size_t GetNumTokens() const { return m_tfs.size(); }
strings::UniString const & GetToken(size_t i) const;
double GetIdf(IdfMap & idfs, size_t i) const;
double GetWeight(IdfMap & idfs, size_t i) const;
bool Empty() const { return m_tfs.empty(); }
private:
friend std::string DebugPrint(DocVec const & dv) { return "DocVec " + ::DebugPrint(dv.m_tfs); }
std::vector<TokenFrequencyPair> m_tfs;
};
// This class represents a search query in a vector space of tokens.
class QueryVec
{
public:
class Builder
{
public:
template <typename Token>
void AddFull(Token && token)
{
m_tokens.emplace_back(std::forward<Token>(token));
}
template <typename Token>
void SetPrefix(Token && token)
{
m_prefix = std::forward<Token>(token);
}
private:
friend class QueryVec;
std::vector<strings::UniString> m_tokens;
std::optional<strings::UniString> m_prefix;
};
explicit QueryVec(IdfMap & idfs) : m_idfs(&idfs) {}
QueryVec(IdfMap & idfs, Builder const & builder);
// Computes cosine similarity between |*this| and |rhs|.
double Similarity(IdfMap & docIdfs, DocVec const & rhs);
// Computes vector norm of the query.
double Norm();
bool Empty() const { return m_tfs.empty() && !m_prefix; }
private:
double GetFullTokenWeight(size_t i);
double GetPrefixTokenWeight();
friend std::string DebugPrint(QueryVec const & qv)
{
std::ostringstream os;
os << "QueryVec " + ::DebugPrint(qv.m_tfs);
if (qv.m_prefix)
os << " " << DebugPrint(*qv.m_prefix);
return os.str();
}
IdfMap * m_idfs;
std::vector<TokenFrequencyPair> m_tfs;
std::optional<strings::UniString> m_prefix;
};
} // namespace search

View file

@ -0,0 +1,118 @@
#include "search/downloader_search_callback.hpp"
#include "search/result.hpp"
#include "editor/editable_data_source.hpp"
#include "indexer/data_source.hpp"
#include "storage/country_info_getter.hpp"
#include "storage/storage.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include <set>
#include <string>
#include <utility>
namespace
{
/// @todo Can't change on string_view now, because of unordered_map<string> synonyms.
bool GetGroupCountryIdFromFeature(storage::Storage const & storage, FeatureType & ft, std::string & name)
{
auto const & synonyms = storage.GetCountryNameSynonyms();
int8_t const langIndices[] = {StringUtf8Multilang::kEnglishCode, StringUtf8Multilang::kDefaultCode,
StringUtf8Multilang::kInternationalCode};
for (auto const langIndex : langIndices)
{
name = ft.GetName(langIndex);
if (name.empty())
continue;
if (storage.IsInnerNode(name))
return true;
auto const it = synonyms.find(name);
if (it == synonyms.end())
continue;
if (!storage.IsInnerNode(it->second))
continue;
name = it->second;
return true;
}
return false;
}
} // namespace
namespace search
{
DownloaderSearchCallback::DownloaderSearchCallback(Delegate & delegate, DataSource const & dataSource,
storage::CountryInfoGetter const & infoGetter,
storage::Storage const & storage,
storage::DownloaderSearchParams params)
: m_delegate(delegate)
, m_dataSource(dataSource)
, m_infoGetter(infoGetter)
, m_storage(storage)
, m_params(std::move(params))
{}
void DownloaderSearchCallback::operator()(search::Results const & results)
{
storage::DownloaderSearchResults downloaderSearchResults;
std::set<storage::DownloaderSearchResult> uniqueResults;
for (auto const & result : results)
{
if (!result.HasPoint())
continue;
if (result.GetResultType() != search::Result::Type::LatLon)
{
FeatureID const & fid = result.GetFeatureID();
FeaturesLoaderGuard loader(m_dataSource, fid.m_mwmId);
auto ft = loader.GetFeatureByIndex(fid.m_index);
if (!ft)
{
LOG(LERROR, ("Feature can't be loaded:", fid));
continue;
}
ftypes::LocalityType const type = ftypes::IsLocalityChecker::Instance().GetType(*ft);
if (type == ftypes::LocalityType::Country || type == ftypes::LocalityType::State)
{
std::string groupFeatureName;
if (GetGroupCountryIdFromFeature(m_storage, *ft, groupFeatureName))
{
storage::DownloaderSearchResult downloaderResult(groupFeatureName, result.GetString() /* m_matchedName */);
if (uniqueResults.find(downloaderResult) == uniqueResults.end())
{
uniqueResults.insert(downloaderResult);
downloaderSearchResults.m_results.push_back(downloaderResult);
}
continue;
}
}
}
auto const & mercator = result.GetFeatureCenter();
storage::CountryId const & countryId = m_infoGetter.GetRegionCountryId(mercator);
if (countryId == storage::kInvalidCountryId)
continue;
storage::DownloaderSearchResult downloaderResult(countryId, result.GetString() /* m_matchedName */);
if (uniqueResults.find(downloaderResult) == uniqueResults.end())
{
uniqueResults.insert(downloaderResult);
downloaderSearchResults.m_results.push_back(downloaderResult);
}
}
downloaderSearchResults.m_query = m_params.m_query;
downloaderSearchResults.m_endMarker = results.IsEndMarker();
m_delegate.RunUITask([onResults = m_params.m_onResults, results = std::move(downloaderSearchResults)]() mutable
{ onResults(std::move(results)); });
}
} // namespace search

View file

@ -0,0 +1,46 @@
#pragma once
#include "storage/downloader_search_params.hpp"
#include <functional>
class DataSource;
namespace storage
{
class CountryInfoGetter;
class Storage;
} // namespace storage
namespace search
{
class Results;
// An on-results callback that should be used for the search in downloader.
//
// *NOTE* the class is NOT thread safe.
class DownloaderSearchCallback
{
public:
class Delegate
{
public:
virtual ~Delegate() = default;
virtual void RunUITask(std::function<void()> fn) = 0;
};
DownloaderSearchCallback(Delegate & delegate, DataSource const & dataSource,
storage::CountryInfoGetter const & infoGetter, storage::Storage const & storage,
storage::DownloaderSearchParams params);
void operator()(search::Results const & results);
private:
Delegate & m_delegate;
DataSource const & m_dataSource;
storage::CountryInfoGetter const & m_infoGetter;
storage::Storage const & m_storage;
storage::DownloaderSearchParams m_params;
};
} // namespace search

View file

@ -0,0 +1,28 @@
#include "search/dummy_rank_table.hpp"
#include "base/macros.hpp"
namespace search
{
uint8_t DummyRankTable::Get(uint64_t /* i */) const
{
return kNoRank;
}
uint64_t DummyRankTable::Size() const
{
NOTIMPLEMENTED();
return 0;
}
RankTable::Version DummyRankTable::GetVersion() const
{
NOTIMPLEMENTED();
return RankTable::VERSION_COUNT;
}
void DummyRankTable::Serialize(Writer &)
{
NOTIMPLEMENTED();
}
} // namespace search

View file

@ -0,0 +1,21 @@
#pragma once
#include "indexer/rank_table.hpp"
#include <cstdint>
namespace search
{
// This dummy rank table is used instead of a normal rank table when
// the latter can't be loaded. It should not be serialized and can't
// be loaded.
class DummyRankTable : public RankTable
{
public:
// RankTable overrides:
uint8_t Get(uint64_t i) const override;
uint64_t Size() const override;
Version GetVersion() const override;
void Serialize(Writer &) override;
};
} // namespace search

View file

@ -0,0 +1,44 @@
#include "search/editor_delegate.hpp"
#include "search/reverse_geocoder.hpp"
#include "editor/editable_data_source.hpp"
#include "indexer/data_source_helpers.hpp"
#include "indexer/feature_decl.hpp"
using namespace std;
namespace search
{
EditorDelegate::EditorDelegate(DataSource const & dataSource) : m_dataSource(dataSource) {}
MwmSet::MwmId EditorDelegate::GetMwmIdByMapName(string const & name) const
{
return m_dataSource.GetMwmIdByCountryFile(platform::CountryFile(name));
}
unique_ptr<osm::EditableMapObject> EditorDelegate::GetOriginalMapObject(FeatureID const & fid) const
{
FeaturesLoaderGuard guard(m_dataSource, fid.m_mwmId);
auto feature = guard.GetOriginalFeatureByIndex(fid.m_index);
if (!feature)
return {};
auto object = make_unique<osm::EditableMapObject>();
object->SetFromFeatureType(*feature);
return object;
}
string EditorDelegate::GetOriginalFeatureStreet(FeatureID const & fid) const
{
search::ReverseGeocoder const coder(m_dataSource);
return coder.GetOriginalFeatureStreetName(fid);
}
void EditorDelegate::ForEachFeatureAtPoint(osm::Editor::FeatureTypeFn && fn, m2::PointD const & point) const
{
auto constexpr kToleranceMeters = 1e-2;
indexer::ForEachFeatureAtPoint(m_dataSource, std::move(fn), point, kToleranceMeters);
}
} // namespace search

View file

@ -0,0 +1,28 @@
#pragma once
#include "editor/osm_editor.hpp"
#include "indexer/editable_map_object.hpp"
#include <memory>
#include <string>
class DataSource;
namespace search
{
class EditorDelegate : public osm::Editor::Delegate
{
public:
EditorDelegate(DataSource const & dataSource);
// osm::Editor::Delegate overrides:
MwmSet::MwmId GetMwmIdByMapName(std::string const & name) const override;
std::unique_ptr<osm::EditableMapObject> GetOriginalMapObject(FeatureID const & fid) const override;
std::string GetOriginalFeatureStreet(FeatureID const & fid) const override;
void ForEachFeatureAtPoint(osm::Editor::FeatureTypeFn && fn, m2::PointD const & point) const override;
private:
DataSource const & m_dataSource;
};
} // namespace search

61
libs/search/emitter.hpp Normal file
View file

@ -0,0 +1,61 @@
#pragma once
#include "search/result.hpp"
#include "search/search_params.hpp"
#include "base/logging.hpp"
#include "base/timer.hpp"
#include <string>
#include <vector>
namespace search
{
namespace bookmarks
{
struct Result;
}
class Emitter
{
public:
void Init(SearchParams::OnResults onResults)
{
m_onResults = std::move(onResults);
m_results.Clear();
m_prevEmitSize = 0;
m_timer.Reset();
}
bool AddResult(Result && res) { return m_results.AddResult(std::move(res)); }
void AddResultNoChecks(Result && res) { m_results.AddResultNoChecks(std::move(res)); }
void AddBookmarkResult(bookmarks::Result const & result) { m_results.AddBookmarkResult(result); }
void Emit(bool force = false)
{
auto const newCount = m_results.GetCount();
if (m_prevEmitSize == newCount && !force)
return;
LOG(LINFO, ("Emitting a new batch of results:", newCount - m_prevEmitSize, ",", m_timer.ElapsedMilliseconds(),
"ms since the search has started."));
m_prevEmitSize = m_results.GetCount();
m_onResults(m_results);
}
Results const & GetResults() const { return m_results; }
void Finish(bool cancelled)
{
m_results.SetEndMarker(cancelled);
Emit(true /* force */);
}
private:
SearchParams::OnResults m_onResults;
Results m_results;
size_t m_prevEmitSize = 0;
base::Timer m_timer;
};
} // namespace search

274
libs/search/engine.cpp Normal file
View file

@ -0,0 +1,274 @@
#include "search/engine.hpp"
#include "search/processor.hpp"
#include "storage/country_info_getter.hpp"
#include "indexer/categories_holder.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/scope_guard.hpp"
#include "base/timer.hpp"
#include <algorithm>
#include <map>
#include <vector>
namespace search
{
using namespace std;
namespace
{
class InitSuggestions
{
map<pair<strings::UniString, int8_t>, uint8_t> m_suggests;
public:
void operator()(CategoriesHolder::Category::Name const & name)
{
if (name.m_prefixLengthToSuggest != CategoriesHolder::Category::kEmptyPrefixLength)
{
strings::UniString const uniName = NormalizeAndSimplifyString(name.m_name);
uint8_t & score = m_suggests[make_pair(uniName, name.m_locale)];
if (score == 0 || score > name.m_prefixLengthToSuggest)
score = name.m_prefixLengthToSuggest;
}
}
void GetSuggests(vector<Suggest> & suggests) const
{
suggests.reserve(suggests.size() + m_suggests.size());
for (auto const & s : m_suggests)
suggests.emplace_back(s.first.first, s.second, s.first.second);
}
};
} // namespace
// ProcessorHandle----------------------------------------------------------------------------------
ProcessorHandle::ProcessorHandle() : m_processor(nullptr), m_cancelled(false) {}
void ProcessorHandle::Cancel()
{
lock_guard<mutex> lock(m_mu);
m_cancelled = true;
if (m_processor)
m_processor->Cancel();
}
void ProcessorHandle::Attach(Processor & processor)
{
lock_guard<mutex> lock(m_mu);
m_processor = &processor;
if (m_cancelled)
m_processor->Cancel();
}
void ProcessorHandle::Detach()
{
lock_guard<mutex> lock(m_mu);
m_processor = nullptr;
}
// Engine::Params ----------------------------------------------------------------------------------
Engine::Params::Params() : m_locale("en"), m_numThreads(1) {}
Engine::Params::Params(string const & locale, size_t numThreads) : m_locale(locale), m_numThreads(numThreads) {}
// Engine ------------------------------------------------------------------------------------------
Engine::Engine(DataSource & dataSource, CategoriesHolder const & categories,
storage::CountryInfoGetter const & infoGetter, Params const & params)
: m_shutdown(false)
{
InitSuggestions doInit;
categories.ForEachName(doInit);
doInit.GetSuggests(m_suggests);
m_contexts.resize(params.m_numThreads);
for (size_t i = 0; i < params.m_numThreads; ++i)
{
auto processor = make_unique<Processor>(dataSource, categories, m_suggests, infoGetter);
processor->SetPreferredLocale(params.m_locale);
m_contexts[i].m_processor = std::move(processor);
}
m_threads.reserve(params.m_numThreads);
for (size_t i = 0; i < params.m_numThreads; ++i)
m_threads.emplace_back(&Engine::MainLoop, this, ref(m_contexts[i]));
CacheWorldLocalities();
LoadCitiesBoundaries();
LoadCountriesTree();
}
Engine::~Engine()
{
{
lock_guard<mutex> lock(m_mu);
m_shutdown = true;
m_cv.notify_all();
}
for (auto & thread : m_threads)
thread.join();
}
weak_ptr<ProcessorHandle> Engine::Search(SearchParams params)
{
shared_ptr<ProcessorHandle> handle(new ProcessorHandle());
PostMessage(Message::TYPE_TASK, [this, params = std::move(params), handle](Processor & processor)
{ DoSearch(std::move(params), handle, processor); });
return handle;
}
void Engine::SetLocale(string const & locale)
{
PostMessage(Message::TYPE_BROADCAST, [locale](Processor & processor) { processor.SetPreferredLocale(locale); });
}
size_t Engine::GetNumThreads() const
{
return m_threads.size();
}
void Engine::ClearCaches()
{
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.ClearCaches(); });
}
void Engine::CacheWorldLocalities()
{
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.CacheWorldLocalities(); });
}
void Engine::LoadCitiesBoundaries()
{
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.LoadCitiesBoundaries(); });
}
void Engine::LoadCountriesTree()
{
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.LoadCountriesTree(); });
}
void Engine::EnableIndexingOfBookmarksDescriptions(bool enable)
{
PostMessage(Message::TYPE_BROADCAST,
[enable](Processor & processor) { processor.EnableIndexingOfBookmarksDescriptions(enable); });
}
void Engine::EnableIndexingOfBookmarkGroup(bookmarks::GroupId const & groupId, bool enable)
{
PostMessage(Message::TYPE_BROADCAST,
[=](Processor & processor) { processor.EnableIndexingOfBookmarkGroup(groupId, enable); });
}
void Engine::ResetBookmarks()
{
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.ResetBookmarks(); });
}
void Engine::OnBookmarksCreated(vector<pair<bookmarks::Id, bookmarks::Doc>> const & marks)
{
PostMessage(Message::TYPE_BROADCAST, [marks](Processor & processor) { processor.OnBookmarksCreated(marks); });
}
void Engine::OnBookmarksUpdated(vector<pair<bookmarks::Id, bookmarks::Doc>> const & marks)
{
PostMessage(Message::TYPE_BROADCAST, [marks](Processor & processor) { processor.OnBookmarksUpdated(marks); });
}
void Engine::OnBookmarksDeleted(vector<bookmarks::Id> const & marks)
{
PostMessage(Message::TYPE_BROADCAST, [marks](Processor & processor) { processor.OnBookmarksDeleted(marks); });
}
void Engine::OnBookmarksAttachedToGroup(bookmarks::GroupId const & groupId, vector<bookmarks::Id> const & marks)
{
PostMessage(Message::TYPE_BROADCAST,
[groupId, marks](Processor & processor) { processor.OnBookmarksAttachedToGroup(groupId, marks); });
}
void Engine::OnBookmarksDetachedFromGroup(bookmarks::GroupId const & groupId, vector<bookmarks::Id> const & marks)
{
PostMessage(Message::TYPE_BROADCAST,
[groupId, marks](Processor & processor) { processor.OnBookmarksDetachedFromGroup(groupId, marks); });
}
void Engine::MainLoop(Context & context)
{
while (true)
{
bool hasBroadcast = false;
queue<Message> messages;
{
unique_lock<mutex> lock(m_mu);
m_cv.wait(lock, [&]() { return m_shutdown || !m_messages.empty() || !context.m_messages.empty(); });
if (m_shutdown)
break;
// As SearchEngine is thread-safe, there is a global order on
// public API requests, and this order is kept by the global
// |m_messages| queue. When a broadcast message arrives, it
// must be executed in any case by all threads, therefore the
// first free thread extracts as many as possible broadcast
// messages from |m_messages| front and replicates them to all
// thread-specific |m_messages| queues.
while (!m_messages.empty() && m_messages.front().m_type == Message::TYPE_BROADCAST)
{
for (auto & b : m_contexts)
b.m_messages.push(m_messages.front());
m_messages.pop();
hasBroadcast = true;
}
// Consumes first non-broadcast message, if any. We process
// only a single task message (in constrast with broadcast
// messages) because task messages are actually search queries,
// whose processing may take an arbitrary amount of time. So
// it's better to process only one message and leave rest to the
// next free search thread.
if (!m_messages.empty())
{
context.m_messages.push(std::move(m_messages.front()));
m_messages.pop();
}
messages.swap(context.m_messages);
}
if (hasBroadcast)
m_cv.notify_all();
while (!messages.empty())
{
messages.front()(*context.m_processor);
messages.pop();
}
}
}
template <typename... Args>
void Engine::PostMessage(Args &&... args)
{
lock_guard<mutex> lock(m_mu);
m_messages.emplace(std::forward<Args>(args)...);
m_cv.notify_one();
}
void Engine::DoSearch(SearchParams params, shared_ptr<ProcessorHandle> handle, Processor & processor)
{
LOG(LINFO, ("Search started:", params.m_mode, params.m_viewport));
base::Timer timer;
SCOPE_GUARD(printDuration, [&timer]() { LOG(LINFO, ("Search ended in", timer.ElapsedMilliseconds(), "ms.")); });
processor.Reset();
handle->Attach(processor);
SCOPE_GUARD(detach, [&handle] { handle->Detach(); });
processor.Search(std::move(params));
}
} // namespace search

181
libs/search/engine.hpp Normal file
View file

@ -0,0 +1,181 @@
#pragma once
#include "search/search_params.hpp"
#include "search/suggest.hpp"
#include "indexer/categories_holder.hpp"
#include "base/macros.hpp"
#include "base/thread.hpp"
#include <condition_variable>
#include <functional>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <vector>
class DataSource;
namespace storage
{
class CountryInfoGetter;
}
namespace search
{
class EngineData;
class Processor;
// This class is used as a reference to a search processor in the
// SearchEngine's queue. It's only possible to cancel a search
// request via this reference.
//
// NOTE: this class is thread-safe.
class ProcessorHandle
{
public:
ProcessorHandle();
// Cancels processor this handle points to.
void Cancel();
private:
friend class Engine;
// Attaches the handle to a |processor|. If there was or will be a
// cancel signal, this signal will be propagated to |processor|.
// This method is called only once, when search engine starts
// the processor this handle corresponds to.
void Attach(Processor & processor);
// Detaches handle from a processor. This method is called only
// once, when search engine completes processing of the query
// that this handle corresponds to.
void Detach();
Processor * m_processor;
bool m_cancelled;
std::mutex m_mu;
DISALLOW_COPY_AND_MOVE(ProcessorHandle);
};
// This class is a wrapper around thread which processes search
// queries one by one.
//
// NOTE: this class is thread safe.
class Engine
{
public:
struct Params
{
Params();
Params(std::string const & locale, size_t numThreads);
std::string m_locale;
// This field controls number of threads SearchEngine will create
// to process queries. Use this field wisely as large values may
// negatively affect performance due to false sharing.
size_t m_numThreads;
};
// Doesn't take ownership of dataSource and categories.
Engine(DataSource & dataSource, CategoriesHolder const & categories, storage::CountryInfoGetter const & infoGetter,
Params const & params);
~Engine();
// Posts search request to the queue and returns its handle.
std::weak_ptr<ProcessorHandle> Search(SearchParams params);
// Sets default locale on all query processors.
void SetLocale(std::string const & locale);
// Returns the number of request-processing threads.
size_t GetNumThreads() const;
// Posts request to clear caches to the queue.
void ClearCaches();
// Posts requests to load and cache localities from World.mwm.
void CacheWorldLocalities();
// Posts request to reload cities boundaries tables.
void LoadCitiesBoundaries();
// Posts request to load countries tree.
void LoadCountriesTree();
void EnableIndexingOfBookmarksDescriptions(bool enable);
void EnableIndexingOfBookmarkGroup(bookmarks::GroupId const & groupId, bool enable);
// Clears all bookmarks data and caches for all processors.
void ResetBookmarks();
void OnBookmarksCreated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);
void OnBookmarksUpdated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);
void OnBookmarksDeleted(std::vector<bookmarks::Id> const & marks);
void OnBookmarksAttachedToGroup(bookmarks::GroupId const & groupId, std::vector<bookmarks::Id> const & marks);
void OnBookmarksDetachedFromGroup(bookmarks::GroupId const & groupId, std::vector<bookmarks::Id> const & marks);
private:
struct Message
{
using Fn = std::function<void(Processor & processor)>;
enum Type
{
TYPE_TASK,
TYPE_BROADCAST
};
template <typename Gn>
Message(Type type, Gn && gn) : m_type(type)
, m_fn(std::forward<Gn>(gn))
{}
void operator()(Processor & processor) { m_fn(processor); }
Type m_type;
Fn m_fn;
};
struct Context
{
// This field *CAN* be accessed by other threads, so |m_mu| must
// be taken before access this queue. Messages are ordered here
// by a timestamp and all timestamps are less than timestamps in
// the global |m_messages| queue.
std::queue<Message> m_messages;
// This field is thread-specific and *CAN NOT* be accessed by
// other threads.
std::unique_ptr<Processor> m_processor;
};
// *ALL* following methods are executed on the m_threads threads.
// This method executes tasks from a common pool (|tasks|) in a FIFO
// manner. |broadcast| contains per-thread tasks, but nevertheless
// all necessary synchronization primitives must be used to access
// |tasks| and |broadcast|.
void MainLoop(Context & context);
template <typename... Args>
void PostMessage(Args &&... args);
void DoSearch(SearchParams params, std::shared_ptr<ProcessorHandle> handle, Processor & processor);
std::vector<Suggest> m_suggests;
bool m_shutdown;
std::mutex m_mu;
std::condition_variable m_cv;
std::queue<Message> m_messages;
std::vector<Context> m_contexts;
std::vector<threads::SimpleThread> m_threads;
};
} // namespace search

View file

@ -0,0 +1,27 @@
#include "search/feature_loader.hpp"
#include "editor/editable_data_source.hpp"
#include "indexer/feature.hpp"
#include "indexer/feature_decl.hpp"
namespace search
{
FeatureLoader::FeatureLoader(DataSource const & dataSource) : m_dataSource(dataSource) {}
std::unique_ptr<FeatureType> FeatureLoader::Load(FeatureID const & id)
{
ASSERT(m_checker.CalledOnOriginalThread(), ());
auto const & mwmId = id.m_mwmId;
if (!m_guard || m_guard->GetId() != mwmId)
m_guard = std::make_unique<FeaturesLoaderGuard>(m_dataSource, mwmId);
return m_guard->GetFeatureByIndex(id.m_index);
}
void FeatureLoader::Reset()
{
ASSERT(m_checker.CalledOnOriginalThread(), ());
m_guard.reset();
}
} // namespace search

View file

@ -0,0 +1,39 @@
#pragma once
#include "indexer/data_source.hpp"
#include "indexer/scales.hpp"
#include "base/assert.hpp"
#include "base/macros.hpp"
#include "base/thread_checker.hpp"
#include <memory>
#include <utility>
class FeatureType;
struct FeatureID;
namespace search
{
class FeatureLoader
{
public:
explicit FeatureLoader(DataSource const & dataSource);
std::unique_ptr<FeatureType> Load(FeatureID const & id);
void Reset();
void ForEachInRect(m2::RectD const & rect, std::function<void(FeatureType &)> const & fn)
{
ASSERT(m_checker.CalledOnOriginalThread(), ());
m_dataSource.ForEachInRect(fn, rect, scales::GetUpperScale());
}
private:
DataSource const & m_dataSource;
std::unique_ptr<FeaturesLoaderGuard> m_guard;
ThreadChecker m_checker;
};
} // namespace search

View file

@ -0,0 +1,336 @@
#pragma once
#include "search/query_params.hpp"
#include "search/search_index_values.hpp"
#include "search/search_trie.hpp"
#include "search/token_slice.hpp"
#include "indexer/trie.hpp"
#include "base/assert.hpp"
#include "base/dfa_helpers.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include "base/uni_string_dfa.hpp"
#include <limits>
#include <memory>
#include <queue>
#include <unordered_set>
#include <vector>
namespace search
{
namespace impl
{
template <typename ValueList>
bool FindLangIndex(trie::Iterator<ValueList> const & trieRoot, uint8_t lang, uint32_t & langIx)
{
ASSERT_LESS(trieRoot.m_edges.size(), std::numeric_limits<uint32_t>::max(), ());
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edges.size());
for (uint32_t i = 0; i < numLangs; ++i)
{
auto const & edge = trieRoot.m_edges[i].m_label;
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
if (edge[0] == lang)
{
langIx = i;
return true;
}
}
return false;
}
template <typename ValueList, typename DFA, typename ToDo>
bool MatchInTrie(trie::Iterator<ValueList> const & trieRoot, strings::UniChar const * rootPrefix, size_t rootPrefixSize,
DFA const & dfa, ToDo && toDo)
{
using TrieDFAIt = std::shared_ptr<trie::Iterator<ValueList>>;
using DFAIt = typename DFA::Iterator;
using State = std::pair<TrieDFAIt, DFAIt>;
std::queue<State> q;
{
auto it = dfa.Begin();
DFAMove(it, rootPrefix, rootPrefix + rootPrefixSize);
if (it.Rejects())
return false;
q.emplace(trieRoot.Clone(), it);
}
bool found = false;
while (!q.empty())
{
auto const p = q.front();
q.pop();
auto const & trieIt = p.first;
auto const & dfaIt = p.second;
if (dfaIt.Accepts())
{
trieIt->m_values.ForEach([&dfaIt, &toDo](auto const & v) { toDo(v, dfaIt.ErrorsMade() == 0); });
found = true;
}
size_t const numEdges = trieIt->m_edges.size();
for (size_t i = 0; i < numEdges; ++i)
{
auto const & edge = trieIt->m_edges[i];
auto curIt = dfaIt;
strings::DFAMove(curIt, edge.m_label.begin(), edge.m_label.end());
if (!curIt.Rejects())
q.emplace(trieIt->GoToEdge(i), curIt);
}
}
return found;
}
template <typename Filter, typename Value>
class OffsetIntersector
{
using Values = std::unordered_map<Value, bool>;
Filter const & m_filter;
std::unique_ptr<Values> m_prevValues;
std::unique_ptr<Values> m_values;
public:
explicit OffsetIntersector(Filter const & filter) : m_filter(filter), m_values(std::make_unique<Values>()) {}
void operator()(Value const & v, bool exactMatch)
{
if (m_prevValues && !m_prevValues->count(v))
return;
if (m_filter(v))
{
auto res = m_values->emplace(v, exactMatch);
if (!res.second)
res.first->second = res.first->second || exactMatch;
}
}
void NextStep()
{
if (!m_prevValues)
m_prevValues = std::make_unique<Values>();
m_prevValues.swap(m_values);
m_values->clear();
}
template <class ToDo>
void ForEachResult(ToDo && toDo) const
{
if (!m_prevValues)
return;
for (auto const & value : *m_prevValues)
toDo(value.first, value.second);
}
};
} // namespace impl
template <typename ValueList>
struct TrieRootPrefix
{
using Value = typename ValueList::Value;
using Iterator = trie::Iterator<ValueList>;
Iterator const & m_root;
strings::UniChar const * m_prefix;
size_t m_prefixSize;
TrieRootPrefix(Iterator const & root, typename Iterator::Edge::EdgeLabel const & edge) : m_root(root)
{
if (edge.size() == 1)
{
m_prefix = 0;
m_prefixSize = 0;
}
else
{
m_prefix = &edge[1];
m_prefixSize = edge.size() - 1;
}
}
};
template <typename Filter, typename Value>
class TrieValuesHolder
{
public:
TrieValuesHolder(Filter const & filter) : m_filter(filter) {}
void operator()(Value const & v, bool exactMatch)
{
if (m_filter(v))
m_values.emplace_back(v, exactMatch);
}
template <class ToDo>
void ForEachValue(ToDo && toDo) const
{
for (auto const & value : m_values)
toDo(value.first, value.second);
}
private:
std::vector<std::pair<Value, bool>> m_values;
Filter const & m_filter;
};
template <typename DFA>
struct SearchTrieRequest
{
SearchTrieRequest() = default;
SearchTrieRequest(SearchTrieRequest &&) = default;
SearchTrieRequest & operator=(SearchTrieRequest &&) = default;
template <typename Langs>
void SetLangs(Langs const & langs)
{
m_langs.clear();
for (auto const lang : langs)
if (lang >= 0 && lang <= std::numeric_limits<int8_t>::max())
m_langs.insert(static_cast<int8_t>(lang));
}
bool HasLang(int8_t lang) const { return m_langs.find(lang) != m_langs.cend(); }
void Clear()
{
m_names.clear();
m_categories.clear();
m_langs.clear();
}
std::vector<DFA> m_names;
std::vector<strings::UniStringDFA> m_categories;
// Set of languages, will be prepended to all DFAs in |m_names|
// during retrieval from a search index. Semantics of this field
// depends on the search index, for example this can be a set of
// langs from StringUtf8Multilang, or a set of locale indices.
std::unordered_set<int8_t> m_langs;
};
// Calls |toDo| for each feature accepted by at least one DFA.
//
// *NOTE* |toDo| may be called several times for the same feature.
template <typename DFA, typename ValueList, typename ToDo>
void MatchInTrie(std::vector<DFA> const & dfas, TrieRootPrefix<ValueList> const & trieRoot, ToDo && toDo)
{
for (auto const & dfa : dfas)
impl::MatchInTrie(trieRoot.m_root, trieRoot.m_prefix, trieRoot.m_prefixSize, dfa, toDo);
}
// Calls |toDo| for each feature in categories branch matching to |request|.
//
// *NOTE* |toDo| may be called several times for the same feature.
template <typename DFA, typename ValueList, typename ToDo>
bool MatchCategoriesInTrie(SearchTrieRequest<DFA> const & request, trie::Iterator<ValueList> const & trieRoot,
ToDo && toDo)
{
uint32_t langIx = 0;
if (!impl::FindLangIndex(trieRoot, search::kCategoriesLang, langIx))
return false;
auto const & edge = trieRoot.m_edges[langIx].m_label;
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
auto const catRoot = trieRoot.GoToEdge(langIx);
MatchInTrie(request.m_categories, TrieRootPrefix<ValueList>(*catRoot, edge), toDo);
return true;
}
// Calls |toDo| with trie root prefix and language code on each
// language allowed by |request|.
template <typename DFA, typename ValueList, typename ToDo>
void ForEachLangPrefix(SearchTrieRequest<DFA> const & request, trie::Iterator<ValueList> const & trieRoot, ToDo && toDo)
{
ASSERT_LESS(trieRoot.m_edges.size(), std::numeric_limits<uint32_t>::max(), ());
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edges.size());
for (uint32_t langIx = 0; langIx < numLangs; ++langIx)
{
auto const & edge = trieRoot.m_edges[langIx].m_label;
ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
int8_t const lang = static_cast<int8_t>(edge[0]);
if (edge[0] < search::kCategoriesLang && request.HasLang(lang))
{
auto const langRoot = trieRoot.GoToEdge(langIx);
TrieRootPrefix<ValueList> langPrefix(*langRoot, edge);
toDo(langPrefix, lang);
}
}
}
// Calls |toDo| for each feature whose description matches to
// |request|. Each feature will be passed to |toDo| only once.
template <typename DFA, typename ValueList, typename Filter, typename ToDo>
void MatchFeaturesInTrie(SearchTrieRequest<DFA> const & request, trie::Iterator<ValueList> const & trieRoot,
Filter const & filter, ToDo && toDo)
{
using Value = typename ValueList::Value;
TrieValuesHolder<Filter, Value> categoriesHolder(filter);
bool const categoriesExist = MatchCategoriesInTrie(request, trieRoot, categoriesHolder);
/// @todo Not sure why do we have OffsetIntersector here? We are doing aggregation only.
impl::OffsetIntersector<Filter, Value> intersector(filter);
ForEachLangPrefix(request, trieRoot, [&request, &intersector](TrieRootPrefix<ValueList> & langRoot, int8_t /* lang */)
{
// Aggregate for all languages.
MatchInTrie(request.m_names, langRoot, intersector);
});
if (categoriesExist)
{
// Aggregate categories.
categoriesHolder.ForEachValue(intersector);
}
intersector.NextStep();
intersector.ForEachResult(toDo);
}
template <typename ValueList, typename Filter, typename ToDo>
void MatchPostcodesInTrie(TokenSlice const & slice, trie::Iterator<ValueList> const & trieRoot, Filter const & filter,
ToDo && toDo)
{
using namespace strings;
using Value = typename ValueList::Value;
uint32_t langIx = 0;
if (!impl::FindLangIndex(trieRoot, search::kPostcodesLang, langIx))
return;
auto const & edge = trieRoot.m_edges[langIx].m_label;
auto const postcodesRoot = trieRoot.GoToEdge(langIx);
impl::OffsetIntersector<Filter, Value> intersector(filter);
for (size_t i = 0; i < slice.Size(); ++i)
{
// Full match required even for prefix token. Reasons:
// 1. For postcode every symbol is important, partial matching can lead to wrong results.
// 2. For prefix match query like "streetname 40" where |streetname| is located in 40xxx
// postcode zone will give all street vicinity as the result which is wrong.
std::vector<UniStringDFA> dfas;
slice.Get(i).ForOriginalAndSynonyms([&dfas](UniString const & s) { dfas.emplace_back(s); });
MatchInTrie(dfas, TrieRootPrefix<ValueList>(*postcodesRoot, edge), intersector);
intersector.NextStep();
}
intersector.ForEachResult(toDo);
}
} // namespace search

View file

@ -0,0 +1,36 @@
#include "search/features_filter.hpp"
#include "search/cbv.hpp"
namespace search
{
// FeaturesFilter ----------------------------------------------------------------------------------
FeaturesFilter::FeaturesFilter(CBV const & filter, uint64_t threshold) : m_filter(filter), m_threshold(threshold) {}
bool FeaturesFilter::NeedToFilter(CBV const & cbv) const
{
if (cbv.IsFull())
return true;
return cbv.PopCount() > m_threshold;
}
// LocalityFilter ----------------------------------------------------------------------------------
LocalityFilter::LocalityFilter(CBV const & filter) : FeaturesFilter(filter, 0 /* threshold */) {}
CBV LocalityFilter::Filter(CBV const & cbv) const
{
return m_filter.Intersect(cbv);
}
// ViewportFilter ----------------------------------------------------------------------------------
ViewportFilter::ViewportFilter(CBV const & filter, uint64_t threshold) : FeaturesFilter(filter, threshold) {}
CBV ViewportFilter::Filter(CBV const & cbv) const
{
auto result = m_filter.Intersect(cbv);
if (!result.IsEmpty())
return result;
return cbv.Take(m_threshold);
}
} // namespace search

View file

@ -0,0 +1,52 @@
#pragma once
#include <cstdint>
namespace search
{
class CBV;
// A lightweight filter of features.
//
// NOTE: this class and its subclasses *ARE* thread-safe.
class FeaturesFilter
{
public:
FeaturesFilter(CBV const & filter, uint64_t threshold);
virtual ~FeaturesFilter() = default;
bool NeedToFilter(CBV const & features) const;
virtual CBV Filter(CBV const & cbv) const = 0;
protected:
CBV const & m_filter;
uint64_t const m_threshold;
};
// Exact filter - leaves only features belonging to the set it was
// constructed from.
class LocalityFilter : public FeaturesFilter
{
public:
LocalityFilter(CBV const & filter);
// FeaturesFilter overrides:
CBV Filter(CBV const & cbv) const override;
};
// Fuzzy filter - tries to leave only features belonging to the set it
// was constructed from, but if the result is empty, leaves at most
// first |threshold| features instead. This property is quite useful
// when there are no matching features in viewport but it's ok to
// process a limited number of features outside the viewport.
class ViewportFilter : public FeaturesFilter
{
public:
ViewportFilter(CBV const & filter, uint64_t threshold);
// FeaturesFilter overrides:
CBV Filter(CBV const & cbv) const override;
};
} // namespace search

View file

@ -0,0 +1,35 @@
#include "search/features_layer.hpp"
#include "base/internal/message.hpp"
#include <iostream>
#include <sstream>
using namespace std;
namespace search
{
FeaturesLayer::FeaturesLayer()
{
Clear();
}
void FeaturesLayer::Clear()
{
m_sortedFeatures = nullptr;
m_subQuery.clear();
m_tokenRange.Clear();
m_type = Model::TYPE_COUNT;
m_hasDelayedFeatures = false;
m_lastTokenIsPrefix = false;
}
string DebugPrint(FeaturesLayer const & layer)
{
ostringstream os;
os << "FeaturesLayer [size of m_sortedFeatures: " << (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0)
<< ", subquery: " << DebugPrint(layer.m_subQuery) << ", tokenRange: " << DebugPrint(layer.m_tokenRange)
<< ", type: " << DebugPrint(layer.m_type) << ", lastTokenIsPrefix: " << layer.m_lastTokenIsPrefix << "]";
return os.str();
}
} // namespace search

View file

@ -0,0 +1,43 @@
#pragma once
#include "search/cbv.hpp"
#include "search/model.hpp"
#include "search/token_range.hpp"
#include "base/string_utils.hpp"
#include <cstdint>
#include <string>
#include <vector>
namespace search
{
// This structure represents a part of search query interpretation -
// when to a substring of tokens [m_startToken, m_endToken) is matched
// with a set of m_features of the same m_type.
struct FeaturesLayer
{
FeaturesLayer();
void Clear();
// Non-owning ptr to a sorted vector of features.
std::vector<uint32_t> const * m_sortedFeatures = nullptr;
// Fetch vector of Features, described by this layer (used for CITY, SUBURB).
std::function<CBV()> m_getFeatures;
strings::UniString m_subQuery;
TokenRange m_tokenRange;
Model::Type m_type;
// Meaningful only when m_type equals to BUILDING.
// When true, m_sortedFeatures contains only features retrieved from
// search index by m_subQuery, and it's necessary for Geocoder to
// perform additional work to retrieve features matching by house number.
bool m_hasDelayedFeatures;
bool m_lastTokenIsPrefix;
};
std::string DebugPrint(FeaturesLayer const & layer);
} // namespace search

View file

@ -0,0 +1,152 @@
#include "search/features_layer_matcher.hpp"
#include "search/house_to_street_table.hpp"
#include "search/reverse_geocoder.hpp"
#include "editor/osm_editor.hpp"
#include "indexer/scales.hpp"
#include "base/assert.hpp"
#include <string>
namespace search
{
using namespace std;
/// Max distance from house to street where we do search matching
/// even if there is no exact street written for this house.
int constexpr kMaxApproxStreetDistanceM = 100;
FeaturesLayerMatcher::FeaturesLayerMatcher(DataSource const & dataSource, base::Cancellable const & cancellable)
: m_context(nullptr)
, m_postcodes(nullptr)
, m_reverseGeocoder(dataSource)
, m_nearbyStreetsCache("FeatureToNearbyStreets")
, m_matchingStreetsCache("BuildingToStreet")
, m_place2address("PlaceToAddresses")
, m_loader(scales::GetUpperScale(), ReverseGeocoder::kLookupRadiusM)
, m_cancellable(cancellable)
{}
void FeaturesLayerMatcher::SetContext(MwmContext * context)
{
ASSERT(context, ());
if (m_context == context)
return;
m_context = context;
m_loader.SetContext(context);
}
void FeaturesLayerMatcher::SetPostcodes(CBV const * postcodes)
{
m_postcodes = postcodes;
}
void FeaturesLayerMatcher::OnQueryFinished()
{
m_nearbyStreetsCache.ClearIfNeeded();
m_matchingStreetsCache.ClearIfNeeded();
m_place2address.ClearIfNeeded();
m_loader.OnQueryFinished();
}
std::vector<uint32_t> const & FeaturesLayerMatcher::GetPlaceAddrFeatures(uint32_t placeId,
std::function<CBV()> const & fn)
{
ASSERT(fn, ());
auto const res = m_place2address.Get(placeId);
if (res.second)
{
auto & value = m_context->m_value;
if (!value.m_house2place)
value.m_house2place = LoadHouseToPlaceTable(value);
fn().ForEach([&](uint32_t fid)
{
auto const r = value.m_house2place->Get(fid);
if (r && r->m_streetId == placeId)
res.first.push_back(fid);
});
ASSERT(base::IsSortedAndUnique(res.first), ());
}
return res.first;
}
uint32_t FeaturesLayerMatcher::GetMatchingStreet(FeatureID const & houseId)
{
std::unique_ptr<FeatureType> feature;
return GetMatchingStreetImpl(houseId, [&]()
{
feature = GetByIndex(houseId.m_index);
return feature.get();
});
}
uint32_t FeaturesLayerMatcher::GetMatchingStreet(FeatureType & feature)
{
return GetMatchingStreetImpl(feature.GetID(), [&]() { return &feature; });
}
FeaturesLayerMatcher::Streets const & FeaturesLayerMatcher::GetNearbyStreets(FeatureType & feature)
{
auto entry = m_nearbyStreetsCache.Get(feature.GetID().m_index);
if (!entry.second)
return entry.first;
entry.first = m_reverseGeocoder.GetNearbyStreets(feature);
return entry.first;
}
template <class FeatureGetterT>
uint32_t FeaturesLayerMatcher::GetMatchingStreetImpl(FeatureID const & id, FeatureGetterT && getter)
{
// Check if this feature is modified - the logic will be different.
string streetName;
bool const edited = osm::Editor::Instance().GetEditedFeatureStreet(id, streetName);
// Check the cached result value.
auto entry = m_matchingStreetsCache.Get(id.m_index);
if (!edited && !entry.second)
return entry.first;
uint32_t & result = entry.first;
result = kInvalidId;
FeatureType * pFeature = getter();
if (pFeature == nullptr)
return result;
FeatureID streetId;
if (!edited && m_reverseGeocoder.GetOriginalStreetByHouse(*pFeature, streetId))
{
result = streetId.m_index;
return result;
}
// Get nearby streets and calculate the resulting index.
auto const & streets = GetNearbyStreets(*pFeature);
if (edited)
{
auto const ret =
find_if(streets.begin(), streets.end(), [&streetName](Street const & st) { return st.m_name == streetName; });
if (ret != streets.end())
{
result = ret->m_id.m_index;
return result;
}
}
// If there is no saved street for feature, assume that it's a nearest street if it's too close.
if (!streets.empty() && streets[0].m_distanceMeters < kMaxApproxStreetDistanceM)
result = streets[0].m_id.m_index;
return result;
}
} // namespace search

View file

@ -0,0 +1,473 @@
#pragma once
#include "search/cancel_exception.hpp"
#include "search/cbv.hpp"
#include "search/features_layer.hpp"
#include "search/house_numbers_matcher.hpp"
#include "search/model.hpp"
#include "search/mwm_context.hpp"
#include "search/point_rect_matcher.hpp"
#include "search/projection_on_street.hpp"
#include "search/reverse_geocoder.hpp"
#include "search/stats_cache.hpp"
#include "search/street_vicinity_loader.hpp"
#include "indexer/feature.hpp"
#include "indexer/feature_algo.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/mwm_set.hpp"
#include "geometry/mercator.hpp"
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
#include "base/cancellable.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include <limits>
#include <memory>
#include <vector>
class DataSource;
namespace search
{
// This class performs pairwise intersection between two layers of
// features, where the first (child) layer is geographically smaller
// than the second (parent) one. It emits all pairs
// (feature-from-child-layer, feature-from-parent-layer) of matching
// features, where feature-from-child-layer belongs-to
// feature-from-parent-layer. Belongs-to is a partial relation on
// features, and has different meaning for different search classes:
//
// * BUILDING/POI belongs-to STREET iff it is located on the street;
// * BUILDING belongs-to CITY iff the building is located in the city;
// * POI belongs-to BUILDING iff the poi is (roughly) located near or inside the building;
// * SUBPOI belongs-to COMPLEX_POI iff the SUBPOI is (roughly) located near or inside the COMPLEX_POI;
// * STREET belongs-to CITY iff the street is (roughly) located in the city;
// * etc.
//
// NOTE: this class *IS NOT* thread-safe.
class FeaturesLayerMatcher
{
public:
static uint32_t constexpr kInvalidId = std::numeric_limits<uint32_t>::max();
static int constexpr kBuildingRadiusMeters = 50;
static int constexpr kComplexPoiRadiusMeters = 300;
static int constexpr kStreetRadiusMeters = 100;
FeaturesLayerMatcher(DataSource const & dataSource, base::Cancellable const & cancellable);
void SetContext(MwmContext * context);
void SetPostcodes(CBV const * postcodes);
template <typename Fn>
void Match(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
{
if (child.m_type >= parent.m_type)
return;
switch (parent.m_type)
{
case Model::TYPE_SUBPOI:
case Model::TYPE_VILLAGE:
case Model::TYPE_STATE:
case Model::TYPE_COUNTRY:
case Model::TYPE_UNCLASSIFIED:
case Model::TYPE_COUNT: ASSERT(false, ("Invalid parent layer type:", parent.m_type)); break;
case Model::TYPE_CITY:
ASSERT_EQUAL(child.m_type, Model::TYPE_BUILDING, ());
MatchBuildingsWithPlace(child, parent, fn);
break;
case Model::TYPE_COMPLEX_POI:
ASSERT_EQUAL(child.m_type, Model::TYPE_SUBPOI, ());
MatchPOIsWithParent(child, parent, fn);
break;
case Model::TYPE_BUILDING:
ASSERT(Model::IsPoi(child.m_type), ());
MatchPOIsWithParent(child, parent, fn);
break;
case Model::TYPE_STREET:
ASSERT(Model::IsPoiOrBuilding(child.m_type), ("Invalid child layer type:", child.m_type));
if (Model::IsPoi(child.m_type))
MatchPOIsWithStreets(child, parent, fn);
else
MatchBuildingsWithStreets(child, parent, fn);
break;
case Model::TYPE_SUBURB:
ASSERT(child.m_type == Model::TYPE_STREET || child.m_type == Model::TYPE_BUILDING || Model::IsPoi(child.m_type),
());
// Avoid matching buildings to suburb without street.
if (child.m_type == Model::TYPE_BUILDING)
MatchBuildingsWithPlace(child, parent, fn);
else
MatchChildWithSuburbs(child, parent, fn);
break;
}
}
void OnQueryFinished();
private:
std::vector<uint32_t> const & GetPlaceAddrFeatures(uint32_t placeId, std::function<CBV()> const & fn);
void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
static bool HouseNumbersMatch(FeatureType & feature, std::vector<house_numbers::Token> const & queryParse)
{
ASSERT(!queryParse.empty(), ());
auto const interpol = ftypes::IsAddressInterpolChecker::Instance().GetInterpolType(feature);
if (interpol != feature::InterpolType::None)
return house_numbers::HouseNumbersMatchRange(feature.GetRef(), queryParse, interpol);
auto const uniHouse = strings::MakeUniString(feature.GetHouseNumber());
if (uniHouse.empty())
return false;
if (feature.GetID().IsEqualCountry({"Czech", "Slovakia"}))
return house_numbers::HouseNumbersMatchConscription(uniHouse, queryParse);
return house_numbers::HouseNumbersMatch(uniHouse, queryParse);
}
template <typename Fn>
void MatchPOIsWithParent(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
{
double parentRadius = 0.0;
// Following code initially loads centers of POIs and then, for
// each building, tries to find all POIs located at distance less
// than parentRadius.
if (parent.m_type == Model::TYPE_BUILDING)
{
ASSERT(Model::IsPoi(child.m_type), ());
parentRadius = kBuildingRadiusMeters;
}
else
{
ASSERT_EQUAL(parent.m_type, Model::TYPE_COMPLEX_POI, ());
ASSERT_EQUAL(child.m_type, Model::TYPE_SUBPOI, ());
parentRadius = kComplexPoiRadiusMeters;
}
auto const & pois = *child.m_sortedFeatures;
auto const & buildings = *parent.m_sortedFeatures;
BailIfCancelled();
std::vector<PointRectMatcher::PointIdPair> poiCenters;
poiCenters.reserve(pois.size());
for (size_t i = 0; i < pois.size(); ++i)
if (auto poiFt = GetByIndex(pois[i]))
poiCenters.emplace_back(feature::GetCenter(*poiFt, FeatureType::WORST_GEOMETRY), i /* id */);
std::vector<PointRectMatcher::RectIdPair> buildingRects;
buildingRects.reserve(buildings.size());
auto maxRadius = parentRadius;
for (size_t i = 0; i < buildings.size(); ++i)
{
BailIfCancelled();
auto buildingFt = GetByIndex(buildings[i]);
if (!buildingFt)
continue;
if (buildingFt->GetGeomType() == feature::GeomType::Point)
{
auto const center = feature::GetCenter(*buildingFt, FeatureType::WORST_GEOMETRY);
buildingRects.emplace_back(mercator::RectByCenterXYAndSizeInMeters(center, parentRadius), i /* id */);
}
else
{
buildingRects.emplace_back(buildingFt->GetLimitRect(FeatureType::WORST_GEOMETRY), i /* id */);
double const rectSize = std::max(buildingRects.back().m_rect.SizeX(), buildingRects.back().m_rect.SizeY());
maxRadius = std::max(maxRadius, rectSize / 2);
}
}
PointRectMatcher::Match(poiCenters, buildingRects, PointRectMatcher::RequestType::Any,
[&](size_t poiId, size_t buildingId)
{
ASSERT_LESS(poiId, pois.size(), ());
ASSERT_LESS(buildingId, buildings.size(), ());
fn(pois[poiId], buildings[buildingId]);
});
if (!parent.m_hasDelayedFeatures)
return;
// |buildings| doesn't contain buildings matching by house number,
// so following code reads buildings in POIs vicinities and checks
// house numbers.
std::vector<house_numbers::Token> queryParse;
ParseQuery(parent.m_subQuery, parent.m_lastTokenIsPrefix, queryParse);
if (queryParse.empty())
return;
for (size_t i = 0; i < pois.size(); ++i)
{
BailIfCancelled();
m_context->ForEachFeature(mercator::RectByCenterXYAndSizeInMeters(poiCenters[i].m_point, maxRadius),
[&](FeatureType & ft)
{
BailIfCancelled();
if (m_postcodes && !m_postcodes->HasBit(ft.GetID().m_index) && !m_postcodes->HasBit(GetMatchingStreet(ft)))
return;
if (HouseNumbersMatch(ft, queryParse))
{
double const distanceM = mercator::DistanceOnEarth(feature::GetCenter(ft), poiCenters[i].m_point);
if (distanceM < maxRadius)
fn(pois[i], ft.GetID().m_index);
}
});
}
}
template <typename Fn>
void MatchPOIsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
{
BailIfCancelled();
ASSERT(Model::IsPoi(child.m_type), ());
ASSERT_EQUAL(parent.m_type, Model::TYPE_STREET, ());
auto const & pois = *child.m_sortedFeatures;
auto const & streets = *parent.m_sortedFeatures;
std::vector<PointRectMatcher::PointIdPair> poiCenters;
poiCenters.reserve(pois.size());
for (size_t i = 0; i < pois.size(); ++i)
if (auto poiFt = GetByIndex(pois[i]))
poiCenters.emplace_back(feature::GetCenter(*poiFt, FeatureType::WORST_GEOMETRY), i /* id */);
std::vector<PointRectMatcher::RectIdPair> streetRects;
streetRects.reserve(streets.size());
std::vector<ProjectionOnStreetCalculator> streetProjectors;
streetProjectors.reserve(streets.size());
for (size_t i = 0; i < streets.size(); ++i)
{
auto streetFt = GetByIndex(streets[i]);
if (!streetFt)
continue;
streetFt->ParseGeometry(FeatureType::WORST_GEOMETRY);
m2::RectD inflationRect;
// Any point is good enough here, and feature::GetCenter would re-read the geometry.
if (streetFt->GetPointsCount() > 0)
inflationRect = mercator::RectByCenterXYAndSizeInMeters(streetFt->GetPoint(0), 0.5 * kStreetRadiusMeters);
for (size_t j = 0; j + 1 < streetFt->GetPointsCount(); ++j)
{
auto const & p1 = streetFt->GetPoint(j);
auto const & p2 = streetFt->GetPoint(j + 1);
m2::RectD rect(p1, p2);
rect.Inflate(inflationRect.SizeX(), inflationRect.SizeY());
streetRects.emplace_back(rect, i /* id */);
}
std::vector<m2::PointD> streetPoints;
streetPoints.reserve(streetFt->GetPointsCount());
for (size_t j = 0; j < streetFt->GetPointsCount(); ++j)
streetPoints.emplace_back(streetFt->GetPoint(j));
streetProjectors.emplace_back(streetPoints);
}
BailIfCancelled();
PointRectMatcher::Match(poiCenters, streetRects, PointRectMatcher::RequestType::All,
[&](size_t poiId, size_t streetId)
{
ASSERT_LESS(poiId, pois.size(), ());
ASSERT_LESS(streetId, streets.size(), ());
auto const & poiCenter = poiCenters[poiId].m_point;
ProjectionOnStreet proj;
if (streetProjectors[streetId].GetProjection(poiCenter, proj) && proj.m_distMeters < kStreetRadiusMeters)
fn(pois[poiId], streets[streetId]);
});
}
template <typename Fn>
void MatchBuildingsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
{
ASSERT_EQUAL(child.m_type, Model::TYPE_BUILDING, ());
ASSERT_EQUAL(parent.m_type, Model::TYPE_STREET, ());
auto const & buildings = *child.m_sortedFeatures;
auto const & streets = *parent.m_sortedFeatures;
// When all buildings are in |buildings| and the number of
// buildings less than the number of streets, it's probably faster
// to check nearby streets for each building instead of street
// vicinities loading.
if (!child.m_hasDelayedFeatures && buildings.size() < streets.size())
{
for (uint32_t const houseId : buildings)
{
BailIfCancelled();
uint32_t const streetId = GetMatchingStreet({m_context->GetId(), houseId});
if (std::binary_search(streets.begin(), streets.end(), streetId))
fn(houseId, streetId);
}
return;
}
std::vector<house_numbers::Token> queryParse;
ParseQuery(child.m_subQuery, child.m_lastTokenIsPrefix, queryParse);
uint32_t numFilterInvocations = 0;
auto const houseNumberFilter = [&](uint32_t houseId, uint32_t streetId)
{
++numFilterInvocations;
if ((numFilterInvocations & 0xFF) == 0)
BailIfCancelled();
if (std::binary_search(buildings.begin(), buildings.end(), houseId))
return true;
if (!child.m_hasDelayedFeatures || queryParse.empty())
return false;
if (m_postcodes && !m_postcodes->HasBit(houseId) && !m_postcodes->HasBit(streetId))
return false;
std::unique_ptr<FeatureType> feature = GetByIndex(houseId);
if (!feature)
return false;
return HouseNumbersMatch(*feature, queryParse);
};
// Cache is not needed since we process unique and mapped-only house->street.
// std::unordered_map<uint32_t, bool> cache;
// auto const cachingHouseNumberFilter = [&](uint32_t houseId, uint32_t streetId)
// {
// auto const res = cache.emplace(houseId, false);
// if (res.second)
// res.first->second = houseNumberFilter(houseId, streetId);
// return res.first->second;
// };
for (uint32_t streetId : streets)
{
BailIfCancelled();
StreetVicinityLoader::Street const & street = m_loader.GetStreet(streetId);
if (street.IsEmpty())
continue;
for (uint32_t houseId : street.m_features)
if (houseNumberFilter(houseId, streetId))
fn(houseId, streetId);
}
}
template <typename Fn>
void MatchBuildingsWithPlace(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
{
ASSERT_EQUAL(child.m_type, Model::TYPE_BUILDING, ());
auto const & buildings = *child.m_sortedFeatures;
uint32_t const placeId = parent.m_sortedFeatures->front();
auto const & ids = GetPlaceAddrFeatures(placeId, parent.m_getFeatures);
if (!buildings.empty())
{
for (uint32_t houseId : buildings)
if (std::binary_search(ids.begin(), ids.end(), houseId))
fn(houseId, placeId);
}
if (!child.m_hasDelayedFeatures)
return;
std::vector<house_numbers::Token> queryParse;
ParseQuery(child.m_subQuery, child.m_lastTokenIsPrefix, queryParse);
if (queryParse.empty())
return;
uint32_t numFilterInvocations = 0;
auto const houseNumberFilter = [&](uint32_t houseId)
{
++numFilterInvocations;
if ((numFilterInvocations & 0xFF) == 0)
BailIfCancelled();
if (m_postcodes && !m_postcodes->HasBit(houseId))
return false;
/// @todo Add house -> number cache for this and MatchBuildingsWithStreets?
std::unique_ptr<FeatureType> feature = GetByIndex(houseId);
if (!feature)
return false;
return HouseNumbersMatch(*feature, queryParse);
};
for (uint32_t houseId : ids)
if (houseNumberFilter(houseId))
fn(houseId, placeId);
}
template <typename Fn>
void MatchChildWithSuburbs(FeaturesLayer const & child, FeaturesLayer const & parent, Fn && fn)
{
// Keep the old logic - simple stub that matches all childs. They will be filtered after in Geocoder.
/// @todo Can intersect with parent.m_getFeatures here.
uint32_t const suburbId = parent.m_sortedFeatures->front();
for (uint32_t feature : *child.m_sortedFeatures)
fn(feature, suburbId);
}
// Returns id of a street feature corresponding to a |houseId|/|houseFeature|, or
// kInvalidId if there're not such street.
uint32_t GetMatchingStreet(FeatureID const & houseId);
uint32_t GetMatchingStreet(FeatureType & houseFeature);
template <class FeatureGetterT>
uint32_t GetMatchingStreetImpl(FeatureID const & id, FeatureGetterT && getter);
using Street = ReverseGeocoder::Street;
using Streets = std::vector<Street>;
Streets const & GetNearbyStreets(FeatureType & feature);
std::unique_ptr<FeatureType> GetByIndex(uint32_t id) const
{
/// @todo Add Cache for feature id -> (point, name / house number).
auto res = m_context->GetFeature(id);
// It may happen to features deleted by the editor. We do not get them from EditableDataSource
// but we still have ids of these features in the search index.
if (!res)
LOG(LWARNING, ("GetFeature() returned false.", id));
return res;
}
MwmContext * m_context;
CBV const * m_postcodes;
ReverseGeocoder m_reverseGeocoder;
// Cache of streets in a feature's vicinity. All lists in the cache
// are ordered by distance from the corresponding feature.
Cache<uint32_t, Streets> m_nearbyStreetsCache;
// Cache of correct streets for buildings. Current search algorithm
// supports only one street for a building, whereas buildings can be
// located on multiple streets.
Cache<uint32_t, uint32_t> m_matchingStreetsCache;
// Cache of addresses that belong to a place (city/village).
Cache<uint32_t, std::vector<uint32_t>> m_place2address;
StreetVicinityLoader m_loader;
base::Cancellable const & m_cancellable;
};
} // namespace search

View file

@ -0,0 +1,214 @@
#include "search/features_layer_path_finder.hpp"
#include "search/features_layer_matcher.hpp"
#include "search/house_numbers_matcher.hpp"
#include "indexer/features_vector.hpp"
#include "base/stl_helpers.hpp"
#include <deque>
#include <unordered_map>
namespace search
{
using namespace std;
// static
FeaturesLayerPathFinder::Mode FeaturesLayerPathFinder::m_mode = MODE_AUTO;
namespace
{
using ParentGraph = deque<unordered_map<uint32_t, uint32_t>>;
// This function tries to estimate amount of work needed to perform an
// intersection pass on a sequence of layers.
template <typename It>
uint64_t CalcPassCost(It begin, It end)
{
uint64_t cost = 0;
if (begin == end)
return cost;
uint64_t reachable = max((*begin)->m_sortedFeatures->size(), size_t(1));
for (++begin; begin != end; ++begin)
{
uint64_t const layer = max((*begin)->m_sortedFeatures->size(), size_t(1));
cost += layer * reachable;
reachable = min(reachable, layer);
}
return cost;
}
uint64_t CalcTopDownPassCost(vector<FeaturesLayer const *> const & layers)
{
return CalcPassCost(layers.rbegin(), layers.rend());
}
uint64_t CalcBottomUpPassCost(vector<FeaturesLayer const *> const & layers)
{
return CalcPassCost(layers.begin(), layers.end());
}
bool GetPath(uint32_t id, vector<FeaturesLayer const *> const & layers, ParentGraph const & parent,
IntersectionResult & result)
{
result.Clear();
if (layers.size() != parent.size() + 1)
return false;
size_t level = 0;
for (auto parentGraphLayer = parent.crbegin(); parentGraphLayer != parent.crend(); ++parentGraphLayer, ++level)
{
result.Set(layers[level]->m_type, id);
auto const it = parentGraphLayer->find(id);
if (it == parentGraphLayer->cend())
return false;
id = it->second;
}
result.Set(layers[level]->m_type, id);
return true;
}
bool MayHaveDelayedFeatures(FeaturesLayer const & layer)
{
return layer.m_type == Model::TYPE_BUILDING &&
house_numbers::LooksLikeHouseNumber(layer.m_subQuery, layer.m_lastTokenIsPrefix);
}
} // namespace
template <class FnT>
void FeaturesLayerPathFinder::FindReachableVertices(FeaturesLayerMatcher & matcher,
vector<FeaturesLayer const *> const & layers, FnT && fn)
{
switch (m_mode)
{
case MODE_AUTO:
{
uint64_t const topDownCost = CalcTopDownPassCost(layers);
uint64_t const bottomUpCost = CalcBottomUpPassCost(layers);
if (bottomUpCost < topDownCost)
FindReachableVerticesBottomUp(matcher, layers, fn);
else
FindReachableVerticesTopDown(matcher, layers, fn);
}
break;
case MODE_BOTTOM_UP: FindReachableVerticesBottomUp(matcher, layers, fn); break;
case MODE_TOP_DOWN: FindReachableVerticesTopDown(matcher, layers, fn); break;
}
}
template <class FnT>
void FeaturesLayerPathFinder::FindReachableVerticesTopDown(FeaturesLayerMatcher & matcher,
vector<FeaturesLayer const *> const & layers, FnT && fn)
{
ASSERT(!layers.empty(), ());
vector<uint32_t> reachable = *(layers.back()->m_sortedFeatures);
vector<uint32_t> buffer;
ParentGraph parentGraph;
auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature)
{
auto & parent = parentGraph.back();
if (parent.find(childFeature) != parent.end())
return;
parent[childFeature] = parentFeature;
buffer.push_back(childFeature);
};
for (size_t i = layers.size() - 1; i != 0; --i)
{
BailIfCancelled();
parentGraph.emplace_back();
FeaturesLayer parent(*layers[i]);
if (i != layers.size() - 1)
base::SortUnique(reachable);
parent.m_sortedFeatures = &reachable;
// The first condition is an optimization: it is enough to extract
// the delayed features only once.
parent.m_hasDelayedFeatures = (i == layers.size() - 1 && MayHaveDelayedFeatures(parent));
FeaturesLayer child(*layers[i - 1]);
child.m_hasDelayedFeatures = MayHaveDelayedFeatures(child);
buffer.clear();
matcher.Match(child, parent, addEdge);
reachable.swap(buffer);
}
auto const & lowestLevel = reachable;
IntersectionResult result;
for (auto const & id : lowestLevel)
if (GetPath(id, layers, parentGraph, result))
fn(result);
}
template <class FnT>
void FeaturesLayerPathFinder::FindReachableVerticesBottomUp(FeaturesLayerMatcher & matcher,
vector<FeaturesLayer const *> const & layers, FnT && fn)
{
ASSERT(!layers.empty(), ());
vector<uint32_t> reachable = *(layers.front()->m_sortedFeatures);
vector<uint32_t> buffer;
ParentGraph parentGraph;
// It is possible that there are delayed features on the lowest level.
// We do not know about them until the matcher has been called, so
// they will be added in |addEdge|. On the other hand, if there is
// only one level, we must make sure that it is nonempty.
// This problem does not arise in the top-down pass because there
// the last reached level is exactly the lowest one.
vector<uint32_t> lowestLevel = reachable;
// True iff |addEdge| works with the lowest level.
bool first = true;
auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature)
{
auto & parent = parentGraph.front();
if (parent.find(childFeature) != parent.end())
return;
parent[childFeature] = parentFeature;
buffer.push_back(parentFeature);
if (first)
lowestLevel.push_back(childFeature);
};
for (size_t i = 0; i + 1 != layers.size(); ++i)
{
BailIfCancelled();
parentGraph.emplace_front();
FeaturesLayer child(*layers[i]);
if (i != 0)
base::SortUnique(reachable);
child.m_sortedFeatures = &reachable;
child.m_hasDelayedFeatures = (i == 0 && MayHaveDelayedFeatures(child));
FeaturesLayer parent(*layers[i + 1]);
parent.m_hasDelayedFeatures = MayHaveDelayedFeatures(parent);
buffer.clear();
matcher.Match(child, parent, addEdge);
reachable.swap(buffer);
first = false;
}
base::SortUnique(lowestLevel);
IntersectionResult result;
for (auto const & id : lowestLevel)
if (GetPath(id, layers, parentGraph, result))
fn(result);
}
} // namespace search

View file

@ -0,0 +1,108 @@
#pragma once
#include "search/cancel_exception.hpp"
#include "search/features_layer.hpp"
#include "search/intersection_result.hpp"
#ifdef DEBUG
#include "base/logging.hpp"
#include "base/timer.hpp"
#endif // DEBUG
#include <utility>
#include <vector>
class FeaturesVector;
class MwmValue;
namespace base
{
class Cancellable;
}
namespace search
{
class FeaturesLayerMatcher;
// This class is able to find all paths through a layered graph, with
// vertices as features, and edges as pairs of vertices satisfying
// belongs-to relation. For more details on belongs-to relation see
// documentation for FeaturesLayerMatcher.
//
// In short, this class is able to find all features matching to a
// given interpretation of a search query.
//
// NOTE: this class *IS* thread-safe.
class FeaturesLayerPathFinder
{
public:
// An internal mode. The modes should produce similar results
// and differ only in efficiency: a mode that is faster
// for a search query may be slower for another.
// Modes other than MODE_AUTO should be used only by the testing code.
enum Mode
{
MODE_AUTO,
MODE_TOP_DOWN,
MODE_BOTTOM_UP
};
FeaturesLayerPathFinder(base::Cancellable const & cancellable) : m_cancellable(cancellable) {}
template <typename TFn>
void ForEachReachableVertex(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
TFn && fn)
{
if (layers.empty())
return;
// TODO (@y): remove following code as soon as
// FindReachableVertices() will work fast for most cases
// (significantly less than 1 second).
#ifdef DEBUG
for (auto const * layer : layers)
LOG(LDEBUG, (DebugPrint(*layer)));
size_t count = 0;
base::Timer timer;
#endif // DEBUG
FindReachableVertices(matcher, layers, [&](IntersectionResult const & r)
{
fn(r);
#ifdef DEBUG
++count;
#endif // DEBUG
});
#ifdef DEBUG
LOG(LDEBUG, ("Found:", count, "elapsed:", timer.ElapsedSeconds(), "seconds"));
#endif // DEBUG
}
static void SetModeForTesting(Mode mode) { m_mode = mode; }
private:
void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
template <class FnT>
void FindReachableVertices(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
FnT && fn);
// Tries to find all |reachable| features from the lowest layer in a
// high level -> low level pass.
template <class FnT>
void FindReachableVerticesTopDown(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
FnT && fn);
// Tries to find all |reachable| features from the lowest layer in a
// low level -> high level pass.
template <class FnT>
void FindReachableVerticesBottomUp(FeaturesLayerMatcher & matcher, std::vector<FeaturesLayer const *> const & layers,
FnT && fn);
base::Cancellable const & m_cancellable;
static Mode m_mode;
};
} // namespace search

View file

@ -0,0 +1,28 @@
#pragma once
#include <cstddef>
namespace search
{
// Performance/quality sensitive settings. They are recommended, but not mandatory.
// Radius is in meters from one of the predefined pivots:
// - viewport center
// - user's position
// - matched city center
struct RecommendedFilteringParams
{
/// @name When reading and matching features "along" the street.
/// @{
// Streets search radius, can be ignored if streets count in area is less than m_maxStreetsCount.
double m_streetSearchRadiusM = 80000;
// Max number of street cadidates. Streets count can be greater, if they are all inside m_streetSearchRadiusM area.
size_t m_maxStreetsCount = 100;
// Streets cluster radius - average logical streets group in an average city.
// In case if Exact match is not found in cluster, we do emit Relaxed cluster streets.
double m_streetClusterRadiusMercator = 0.05; // ~5km
/// @}
};
} // namespace search

1949
libs/search/geocoder.cpp Normal file

File diff suppressed because it is too large Load diff

374
libs/search/geocoder.hpp Normal file
View file

@ -0,0 +1,374 @@
#pragma once
#include "search/cancel_exception.hpp"
#include "search/categories_cache.hpp"
#include "search/cbv.hpp"
#include "search/cities_boundaries_table.hpp"
#include "search/cuisine_filter.hpp"
#include "search/feature_offset_match.hpp"
#include "search/features_layer.hpp"
#include "search/features_layer_path_finder.hpp"
#include "search/filtering_params.hpp"
#include "search/geocoder_context.hpp"
#include "search/geocoder_locality.hpp"
#include "search/geometry_cache.hpp"
#include "search/mode.hpp"
#include "search/model.hpp"
#include "search/mwm_context.hpp"
#include "search/postcode_points.hpp"
#include "search/query_params.hpp"
#include "search/streets_matcher.hpp"
#include "search/token_range.hpp"
#include "search/tracer.hpp"
#include "indexer/mwm_set.hpp"
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
#include "base/cancellable.hpp"
#include "base/dfa_helpers.hpp"
#include "base/levenshtein_dfa.hpp"
#include <map>
#include <memory>
#include <optional>
#include <vector>
class CategoriesHolder;
class DataSource;
class MwmValue;
namespace storage
{
class CountryInfoGetter;
} // namespace storage
namespace search
{
class FeaturesFilter;
class FeaturesLayerMatcher;
class PreRanker;
class TokenSlice;
// This class is used to retrieve all features corresponding to a
// search query. Search query is represented as a sequence of tokens
// (including synonyms for these tokens), and Geocoder tries to build
// all possible partitions (or layers) of the search query, where each
// layer is a set of features corresponding to some search class
// (e.g. POI, BUILDING, STREET, etc., see search_model.hpp).
// Then, Geocoder builds a layered graph, with edges between features
// on adjacent layers (e.g. between BUILDING ans STREET, STREET and
// CITY, etc.). Usually an edge between two features means that a
// feature from the lowest layer geometrically belongs to a feature
// from the highest layer (BUILDING is located on STREET, STREET is
// located inside CITY, CITY is located inside STATE, etc.). Final
// part is to find all paths through this layered graph and report all
// features from the lowest layer, that are reachable from the
// highest layer.
class Geocoder
{
public:
struct Params : public QueryParams
{
Mode m_mode = Mode::Everywhere;
m2::RectD m_pivot;
std::optional<m2::PointD> m_position;
Locales m_categoryLocales;
std::vector<uint32_t> m_cuisineTypes;
std::vector<uint32_t> m_preferredTypes;
std::shared_ptr<Tracer> m_tracer;
RecommendedFilteringParams m_filteringParams;
int m_scale = scales::GetUpperScale();
bool m_useDebugInfo = false; // Set to true for debug logs and tests.
};
struct LocalitiesCaches
{
LocalitiesCaches(base::Cancellable const & cancellable);
void Clear();
CountriesCache m_countries;
StatesCache m_states;
CitiesTownsOrVillagesCache m_citiesTownsOrVillages;
VillagesCache m_villages;
};
Geocoder(DataSource const & dataSource, storage::CountryInfoGetter const & infoGetter,
CategoriesHolder const & categories, CitiesBoundariesTable const & citiesBoundaries, PreRanker & preRanker,
LocalitiesCaches & localitiesCaches, base::Cancellable const & cancellable);
~Geocoder();
// Sets search query params.
void SetParams(Params const & params);
// Starts geocoding, retrieved features will be appended to
// |results|.
void GoEverywhere();
void GoInViewport();
// Ends geocoding and informs the following stages
// of the pipeline (PreRanker and further).
// This method must be called from the previous stage
// of the pipeline (the Processor).
// If |cancelled| is true, the reason for calling Finish must
// be the cancellation of processing the search request, otherwise
// the reason must be the normal exit from GoEverywhere or GoInViewport.
//
// *NOTE* The caller assumes that a call to this method will never
// result in search::CancelException even if the shutdown takes
// noticeable time.
void Finish(bool cancelled);
void CacheWorldLocalities();
void ClearCaches();
private:
enum class RectId
{
Pivot,
Locality,
Postcode,
Suburb,
Count
};
using MwmInfoPtr = std::shared_ptr<MwmInfo>;
struct ExtendedMwmInfos
{
struct ExtendedMwmInfo
{
bool operator<(ExtendedMwmInfo const & rhs) const { return m_score < rhs.m_score; }
MwmInfoPtr m_info;
MwmContext::MwmType m_type;
// Score is a rect distance, with exceptions for World, viewport and users's position.
// Less score is better for search priority.
double m_score;
};
std::vector<ExtendedMwmInfo> m_infos;
size_t m_firstBatchSize = 0;
};
struct Postcodes
{
void Clear()
{
m_tokenRange.Clear();
m_countryFeatures.Reset();
m_worldFeatures.Reset();
}
bool Has(uint32_t id, bool searchWorld = false) const
{
if (searchWorld)
return m_worldFeatures.HasBit(id);
return m_countryFeatures.HasBit(id);
}
bool IsEmpty() const { return m_countryFeatures.IsEmpty() && m_worldFeatures.IsEmpty(); }
TokenRange m_tokenRange;
CBV m_countryFeatures;
CBV m_worldFeatures;
};
// Sets search query params for categorial search.
void SetParamsForCategorialSearch(Params const & params);
void GoImpl(std::vector<MwmInfoPtr> const & infos, bool inViewport);
template <typename Locality>
using TokenToLocalities = std::map<TokenRange, std::vector<Locality>>;
QueryParams::Token const & GetTokens(size_t i) const;
// Creates a cache of posting lists corresponding to features in m_context
// for each token and saves it to m_addressFeatures.
void InitBaseContext(BaseContext & ctx);
void InitLayer(Model::Type type, TokenRange const & tokenRange, FeaturesLayer & layer);
void FillLocalityCandidates(BaseContext const & ctx, CBV const & filter, size_t const maxNumLocalities,
std::vector<Locality> & preLocalities);
void FillLocalitiesTable(BaseContext const & ctx);
void FillVillageLocalities(BaseContext const & ctx);
bool CityHasPostcode(BaseContext const & ctx) const;
template <typename Fn>
void ForEachCountry(ExtendedMwmInfos const & infos, Fn && fn);
// Throws CancelException if cancelled.
void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
// A fast-path branch for categorial requests.
void MatchCategories(BaseContext & ctx, bool aroundPivot);
// Tries to find all countries and states in a search query and then
// performs matching of cities in found maps.
void MatchRegions(BaseContext & ctx, Region::Type type);
// Tries to find all cities in a search query and then performs
// matching of streets in found cities.
void MatchCities(BaseContext & ctx);
// Tries to do geocoding without localities, ie. find POIs,
// BUILDINGs and STREETs without knowledge about country, state,
// city or village. If during the geocoding too many features are
// retrieved, viewport is used to throw away excess features.
void MatchAroundPivot(BaseContext & ctx);
class CentersFilter
{
buffer_vector<m2::PointD, 4> m_centers;
public:
void Add(m2::PointD const & pt) { m_centers.push_back(pt); }
template <class FnT>
void ClusterizeStreets(std::vector<uint32_t> & streets, Geocoder const & geocoder, FnT && fn) const;
};
// Tries to do geocoding in a limited scope, assuming that knowledge
// about high-level features, like cities or countries, is
// incorporated into |filter|.
void LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter, CentersFilter const & centers);
template <typename Fn>
void WithPostcodes(BaseContext & ctx, Fn && fn);
// Tries to match some adjacent tokens in the query as streets and
// then performs geocoding in street vicinities.
void GreedilyMatchStreets(BaseContext & ctx, CentersFilter const & centers);
// Matches suburbs and streets inside suburbs like |GreedilyMatchStreets|.
void GreedilyMatchStreetsWithSuburbs(BaseContext & ctx, CentersFilter const & centers);
void CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx, StreetsMatcher::Prediction const & prediction,
CentersFilter const & centers, bool makeRelaxed);
void ProcessStreets(BaseContext & ctx, CentersFilter const & centers, CBV const & streets);
// Tries to find all paths in a search tree, where each edge is
// marked with some substring of the query tokens. These paths are
// called "layer sequence" and current path is stored in |m_layers|.
void MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken, CBV const & filter);
// Returns true if current path in the search tree (see comment for
// MatchPOIsAndBuildings()) looks sane. This method is used as a fast
// pre-check to cut off unnecessary work.
bool IsLayerSequenceSane(std::vector<FeaturesLayer> const & layers) const;
/// @returns kInvalidFeatureId in no matching found.
uint32_t MatchWorld2Country(FeatureID const & id) const;
// Finds all paths through layers and emits reachable features from the lowest layer.
void FindPaths(BaseContext & ctx);
void TraceResult(Tracer & tracer, BaseContext const & ctx, MwmSet::MwmId const & mwmId, uint32_t ftId,
Model::Type type, TokenRange const & tokenRange);
// Forms result and feeds it to |m_preRanker|.
void EmitResult(BaseContext & ctx, FeatureID const & id, Model::Type type, TokenRange const & tokenRange,
IntersectionResult const * geoParts, bool allTokensUsed, bool exactMatch);
void EmitResult(BaseContext & ctx, Region const & region, TokenRange const & tokenRange, bool allTokensUsed,
bool exactMatch);
void EmitResult(BaseContext & ctx, City const & city, TokenRange const & tokenRange, bool allTokensUsed);
// Tries to match unclassified objects from lower layers, like
// parks, forests, lakes, rivers, etc. This method finds all
// UNCLASSIFIED objects that match to all currently unused tokens.
void MatchUnclassified(BaseContext & ctx, size_t curToken);
// A wrapper around RetrievePostcodeFeatures.
CBV RetrievePostcodeFeatures(MwmContext const & context, TokenSlice const & slice);
// A caching wrapper around Retrieval::RetrieveGeometryFeatures.
CBV RetrieveGeometryFeatures(MwmContext const & context, m2::RectD const & rect, RectId id);
// This is a faster wrapper around SearchModel::GetSearchType(), as
// it uses pre-loaded lists of streets and villages.
[[nodiscard]] bool GetTypeInGeocoding(BaseContext const & ctx, uint32_t featureId, Model::Type & type);
// Reorders maps in a way that prefix consists of "best" maps to search and suffix consists of all
// other maps ordered by minimum distance from pivot. Returns ExtendedMwmInfos structure which
// consists of vector of mwms with MwmType information and number of "best" maps to search.
// For viewport mode prefix consists of maps intersecting with pivot ordered by distance from
// pivot center.
// For non-viewport search mode prefix consists of maps intersecting with pivot, map with user
// location and maps with cities matched to the query, sorting prefers mwms that contain the
// user's position.
ExtendedMwmInfos OrderCountries(bool inViewport, std::vector<MwmInfoPtr> const & infos);
DataSource const & m_dataSource;
storage::CountryInfoGetter const & m_infoGetter;
CategoriesHolder const & m_categories;
StreetsCache m_streetsCache;
SuburbsCache m_suburbsCache;
LocalitiesCaches & m_localitiesCaches;
HotelsCache m_hotelsCache;
FoodCache m_foodCache;
cuisine_filter::CuisineFilter m_cuisineFilter;
base::Cancellable const & m_cancellable;
// Geocoder params.
Params m_params;
// This field is used to map features to a limited number of search
// classes.
Model m_model;
// Following fields are set up by Search() method and can be
// modified and used only from Search() or its callees.
MwmSet::MwmId m_worldId;
// Context of the currently processed mwm.
std::unique_ptr<MwmContext> m_context;
// m_cities stores both big cities that are visible at World.mwm
// and small villages and hamlets that are not.
TokenToLocalities<City> m_cities;
TokenToLocalities<Region> m_regions[Region::TYPE_COUNT];
CitiesBoundariesTable const & m_citiesBoundaries;
// Caches of features in rects. These caches are separated from
// TokenToLocalities because the latter are quite lightweight and not
// all of them are needed.
PivotRectsCache m_pivotRectsCache;
PivotRectsCache m_postcodesRectsCache;
PivotRectsCache m_suburbsRectsCache;
LocalityRectsCache m_localityRectsCache;
PostcodePointsCache m_postcodePointsCache;
// Postcodes features in the mwm that is currently being processed and World.mwm.
Postcodes m_postcodes;
// This filter is used to throw away excess features.
FeaturesFilter const * m_filter;
// Features matcher for layers intersection.
std::map<MwmSet::MwmId, std::unique_ptr<FeaturesLayerMatcher>> m_matchersCache;
FeaturesLayerMatcher * m_matcher;
// Path finder for interpretations.
FeaturesLayerPathFinder m_finder;
// Search query params prepared for retrieval.
std::vector<SearchTrieRequest<strings::LevenshteinDFA>> m_tokenRequests;
SearchTrieRequest<strings::PrefixDFAModifier<strings::LevenshteinDFA>> m_prefixTokenRequest;
ResultTracer m_resultTracer;
PreRanker & m_preRanker;
};
} // namespace search

View file

@ -0,0 +1,111 @@
#include "search/geocoder_context.hpp"
#include "search/token_range.hpp"
#include "base/assert.hpp"
#include "base/stl_helpers.hpp"
namespace search
{
// static
BaseContext::TokenType BaseContext::FromModelType(Model::Type type)
{
switch (type)
{
case Model::TYPE_SUBPOI: return TOKEN_TYPE_SUBPOI;
case Model::TYPE_COMPLEX_POI: return TOKEN_TYPE_COMPLEX_POI;
case Model::TYPE_BUILDING: return TOKEN_TYPE_BUILDING;
case Model::TYPE_STREET: return TOKEN_TYPE_STREET;
case Model::TYPE_SUBURB: return TOKEN_TYPE_SUBURB;
case Model::TYPE_UNCLASSIFIED: return TOKEN_TYPE_UNCLASSIFIED;
case Model::TYPE_VILLAGE: return TOKEN_TYPE_VILLAGE;
case Model::TYPE_CITY: return TOKEN_TYPE_CITY;
case Model::TYPE_STATE: return TOKEN_TYPE_STATE;
case Model::TYPE_COUNTRY: return TOKEN_TYPE_COUNTRY;
case Model::TYPE_COUNT: return TOKEN_TYPE_COUNT;
}
UNREACHABLE();
}
// static
BaseContext::TokenType BaseContext::FromRegionType(Region::Type type)
{
switch (type)
{
case Region::TYPE_STATE: return TOKEN_TYPE_STATE;
case Region::TYPE_COUNTRY: return TOKEN_TYPE_COUNTRY;
case Region::TYPE_COUNT: return TOKEN_TYPE_COUNT;
}
UNREACHABLE();
}
size_t BaseContext::NumTokens() const
{
ASSERT_EQUAL(m_tokens.size(), m_features.size(), ());
return m_tokens.size();
}
size_t BaseContext::SkipUsedTokens(size_t curToken) const
{
while (curToken != m_tokens.size() && IsTokenUsed(curToken))
++curToken;
return curToken;
}
bool BaseContext::IsTokenUsed(size_t token) const
{
ASSERT_LESS(token, m_tokens.size(), ());
return m_tokens[token] != TOKEN_TYPE_COUNT;
}
bool BaseContext::AllTokensUsed() const
{
for (size_t i = 0; i < m_tokens.size(); ++i)
if (!IsTokenUsed(i))
return false;
return true;
}
bool BaseContext::HasUsedTokensInRange(TokenRange const & range) const
{
ASSERT(range.IsValid(), (range));
for (size_t i = range.Begin(); i < range.End(); ++i)
if (IsTokenUsed(i))
return true;
return false;
}
size_t BaseContext::NumUnusedTokenGroups() const
{
size_t numGroups = 0;
for (size_t i = 0; i < m_tokens.size(); ++i)
if (!IsTokenUsed(i) && (i == 0 || IsTokenUsed(i - 1)))
++numGroups;
return numGroups;
}
std::string ToString(BaseContext::TokenType type)
{
switch (type)
{
case BaseContext::TOKEN_TYPE_SUBPOI: return "SUBPOI";
case BaseContext::TOKEN_TYPE_COMPLEX_POI: return "COMPLEX_POI";
case BaseContext::TOKEN_TYPE_BUILDING: return "BUILDING";
case BaseContext::TOKEN_TYPE_STREET: return "STREET";
case BaseContext::TOKEN_TYPE_SUBURB: return "SUBURB";
case BaseContext::TOKEN_TYPE_UNCLASSIFIED: return "UNCLASSIFIED";
case BaseContext::TOKEN_TYPE_VILLAGE: return "VILLAGE";
case BaseContext::TOKEN_TYPE_CITY: return "CITY";
case BaseContext::TOKEN_TYPE_STATE: return "STATE";
case BaseContext::TOKEN_TYPE_COUNTRY: return "COUNTRY";
case BaseContext::TOKEN_TYPE_POSTCODE: return "POSTCODE";
case BaseContext::TOKEN_TYPE_COUNT: return "COUNT";
}
UNREACHABLE();
}
std::string DebugPrint(BaseContext::TokenType type)
{
return ToString(type);
}
} // namespace search

View file

@ -0,0 +1,87 @@
#pragma once
#include "search/cbv.hpp"
#include "search/cuisine_filter.hpp"
#include "search/features_layer.hpp"
#include "search/geocoder_locality.hpp"
#include "search/model.hpp"
#include "search/retrieval.hpp"
#include <memory>
#include <string>
#include <vector>
namespace search
{
class FeaturesFilter;
class TokenRange;
struct BaseContext
{
enum TokenType
{
TOKEN_TYPE_SUBPOI,
TOKEN_TYPE_COMPLEX_POI,
TOKEN_TYPE_BUILDING,
TOKEN_TYPE_STREET,
TOKEN_TYPE_SUBURB,
TOKEN_TYPE_UNCLASSIFIED,
TOKEN_TYPE_VILLAGE,
TOKEN_TYPE_CITY,
TOKEN_TYPE_STATE,
TOKEN_TYPE_COUNTRY,
TOKEN_TYPE_POSTCODE,
TOKEN_TYPE_COUNT
};
static TokenType FromModelType(Model::Type type);
static TokenType FromRegionType(Region::Type type);
size_t NumTokens() const;
// Advances |curToken| to the nearest unused token, or to the end of
// |m_usedTokens| if there are no unused tokens.
size_t SkipUsedTokens(size_t curToken) const;
// Returns true if |token| is marked as used.
bool IsTokenUsed(size_t token) const;
// Returns true iff all tokens are used.
bool AllTokensUsed() const;
// Returns true if there exists at least one used token in |range|.
bool HasUsedTokensInRange(TokenRange const & range) const;
// Counts number of groups of consecutive unused tokens.
size_t NumUnusedTokenGroups() const;
// List of bit-vectors of features, where i-th element of the list
// corresponds to the i-th token in the search query.
std::vector<Retrieval::ExtendedFeatures> m_features;
CBV m_villages;
CBV m_streets;
CBV m_suburbs;
// Stack of layers filled during geocoding.
std::vector<FeaturesLayer> m_layers;
// Stack of regions filled during geocoding.
std::vector<Region const *> m_regions;
City const * m_city = nullptr;
// This vector is used to indicate what tokens were already matched
// and can't be re-used during the geocoding process.
std::vector<TokenType> m_tokens;
// The total number of results emitted using this
// context in all branches of the search.
size_t m_numEmitted = 0;
std::unique_ptr<cuisine_filter::CuisineFilter::ScopedFilter> m_cuisineFilter;
};
std::string DebugPrint(BaseContext::TokenType type);
std::string ToString(BaseContext::TokenType type);
} // namespace search

View file

@ -0,0 +1,28 @@
#include "search/geocoder_locality.hpp"
#include <sstream>
namespace search
{
// static
Model::Type Region::ToModelType(Type type)
{
switch (type)
{
case Region::TYPE_STATE: return Model::TYPE_STATE;
case Region::TYPE_COUNTRY: return Model::TYPE_COUNTRY;
case Region::TYPE_COUNT: return Model::TYPE_COUNT;
}
UNREACHABLE();
}
std::string DebugPrint(Locality const & locality)
{
std::ostringstream os;
os << "Locality [ ";
os << "m_featureId=" << DebugPrint(locality.m_featureId) << ", ";
os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", ";
os << " ]";
return os.str();
}
} // namespace search

View file

@ -0,0 +1,80 @@
#pragma once
#include "search/doc_vec.hpp"
#include "search/model.hpp"
#include "search/token_range.hpp"
#include "indexer/feature_decl.hpp"
#include "storage/country_info_getter.hpp"
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
#include <string>
namespace search
{
class IdfMap;
struct Locality
{
Locality(FeatureID const & fID, TokenRange const & tokenRange, QueryVec const & queryVec, bool exactMatch)
: m_featureId(fID)
, m_tokenRange(tokenRange)
, m_queryVec(queryVec)
, m_exactMatch(exactMatch)
{}
uint32_t GetFeatureIndex() const { return m_featureId.m_index; }
FeatureID m_featureId;
TokenRange m_tokenRange;
QueryVec m_queryVec;
bool m_exactMatch;
};
// This struct represents a country or US- or Canadian- state. It
// is used to filter maps before search.
struct Region : public Locality
{
enum Type
{
TYPE_STATE,
TYPE_COUNTRY,
TYPE_COUNT
};
Region(Locality && locality, Type type) : Locality(std::move(locality)), m_center(0, 0), m_type(type) {}
static Model::Type ToModelType(Type type);
storage::CountryInfoGetter::RegionIdVec m_ids;
m2::PointD m_center;
Type m_type;
};
// This struct represents a city or a village. It is used to filter features
// during search.
// todo(@m) It works well as is, but consider a new naming scheme
// when counties etc. are added. E.g., Region for countries and
// states and Locality for smaller settlements.
struct City : public Locality
{
City(Locality && locality, Model::Type type) : Locality(std::move(locality)), m_type(type) {}
m2::RectD m_rect;
Model::Type m_type;
};
struct Suburb
{
Suburb(FeatureID const & featureId, TokenRange const & tokenRange) : m_featureId(featureId), m_tokenRange(tokenRange)
{}
FeatureID m_featureId;
TokenRange m_tokenRange;
};
std::string DebugPrint(Locality const & locality);
} // namespace search

View file

@ -0,0 +1,68 @@
#include "search/geometry_cache.hpp"
#include "search/geometry_utils.hpp"
#include "search/mwm_context.hpp"
#include "search/retrieval.hpp"
#include "coding/point_coding.hpp"
#include "geometry/mercator.hpp"
namespace search
{
// GeometryCache -----------------------------------------------------------------------------------
GeometryCache::GeometryCache(size_t maxNumEntries, base::Cancellable const & cancellable)
: m_maxNumEntries(maxNumEntries)
, m_cancellable(cancellable)
{
CHECK_GREATER(m_maxNumEntries, 0, ());
}
void GeometryCache::InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, Entry & entry)
{
Retrieval retrieval(context, m_cancellable);
entry.m_rect = rect;
entry.m_cbv = retrieval.RetrieveGeometryFeatures(rect, scale);
entry.m_scale = scale;
}
// PivotRectsCache ---------------------------------------------------------------------------------
PivotRectsCache::PivotRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable, double maxRadiusMeters)
: GeometryCache(maxNumEntries, cancellable)
, m_maxRadiusMeters(maxRadiusMeters)
{}
CBV PivotRectsCache::Get(MwmContext const & context, m2::RectD const & rect, int scale)
{
auto p = FindOrCreateEntry(context.GetId(), [&rect, &scale](Entry const & entry)
{
return scale == entry.m_scale &&
(entry.m_rect.IsRectInside(rect) || IsEqualMercator(rect, entry.m_rect, kMwmPointAccuracy));
});
auto & entry = p.first;
if (p.second)
{
m2::RectD normRect = mercator::RectByCenterXYAndSizeInMeters(rect.Center(), m_maxRadiusMeters);
if (!normRect.IsRectInside(rect))
normRect = rect;
InitEntry(context, normRect, scale, entry);
}
return entry.m_cbv;
}
// LocalityRectsCache ------------------------------------------------------------------------------
LocalityRectsCache::LocalityRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable)
: GeometryCache(maxNumEntries, cancellable)
{}
CBV LocalityRectsCache::Get(MwmContext const & context, m2::RectD const & rect, int scale)
{
auto p = FindOrCreateEntry(context.GetId(), [&rect, &scale](Entry const & entry)
{ return scale == entry.m_scale && IsEqualMercator(rect, entry.m_rect, kMwmPointAccuracy); });
auto & entry = p.first;
if (p.second)
InitEntry(context, rect, scale, entry);
return entry.m_cbv;
}
} // namespace search

View file

@ -0,0 +1,101 @@
#pragma once
#include "search/cbv.hpp"
#include "indexer/mwm_set.hpp"
#include "geometry/rect2d.hpp"
#include "base/assert.hpp"
#include <algorithm>
#include <cstdint>
#include <deque>
#include <map>
#include <utility>
namespace base
{
class Cancellable;
}
namespace search
{
class MwmContext;
// This class represents a simple cache of features in rects for all mwms.
//
// *NOTE* This class is not thread-safe.
class GeometryCache
{
public:
virtual ~GeometryCache() = default;
// Returns (hopefully, cached) list of features in a given
// rect. Note that return value may be invalidated on next calls to
// this method.
virtual CBV Get(MwmContext const & context, m2::RectD const & rect, int scale) = 0;
inline void Clear() { m_entries.clear(); }
protected:
struct Entry
{
m2::RectD m_rect;
CBV m_cbv;
int m_scale = 0;
};
// |maxNumEntries| denotes the maximum number of rectangles that
// will be cached for each mwm individually.
GeometryCache(size_t maxNumEntries, base::Cancellable const & cancellable);
template <typename Pred>
std::pair<Entry &, bool> FindOrCreateEntry(MwmSet::MwmId const & id, Pred && pred)
{
auto & entries = m_entries[id];
auto it = std::find_if(entries.begin(), entries.end(), std::forward<Pred>(pred));
if (it != entries.end())
{
if (it != entries.begin())
iter_swap(entries.begin(), it);
return std::pair<Entry &, bool>(entries.front(), false);
}
entries.emplace_front();
if (entries.size() == m_maxNumEntries + 1)
entries.pop_back();
ASSERT_LESS_OR_EQUAL(entries.size(), m_maxNumEntries, ());
ASSERT(!entries.empty(), ());
return std::pair<Entry &, bool>(entries.front(), true);
}
void InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, Entry & entry);
std::map<MwmSet::MwmId, std::deque<Entry>> m_entries;
size_t const m_maxNumEntries;
base::Cancellable const & m_cancellable;
};
class PivotRectsCache : public GeometryCache
{
public:
PivotRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable, double maxRadiusMeters);
// GeometryCache overrides:
CBV Get(MwmContext const & context, m2::RectD const & rect, int scale) override;
private:
double const m_maxRadiusMeters;
};
class LocalityRectsCache : public GeometryCache
{
public:
LocalityRectsCache(size_t maxNumEntries, base::Cancellable const & cancellable);
// GeometryCache overrides:
CBV Get(MwmContext const & context, m2::RectD const & rect, int scale) override;
};
} // namespace search

View file

@ -0,0 +1,18 @@
#include "search/geometry_utils.hpp"
#include "indexer/scales.hpp"
#include "geometry/mercator.hpp"
namespace search
{
double PointDistance(m2::PointD const & a, m2::PointD const & b)
{
return mercator::DistanceOnEarth(a, b);
}
bool IsEqualMercator(m2::RectD const & r1, m2::RectD const & r2, double eps)
{
return m2::IsEqual(r1, r2, eps, eps);
}
} // namespace search

View file

@ -0,0 +1,14 @@
#pragma once
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
namespace search
{
// Distance between 2 mercator points in meters.
double PointDistance(m2::PointD const & a, m2::PointD const & b);
// Tests whether two rects given in the mercator projection are
// equal with the absolute precision |eps|.
bool IsEqualMercator(m2::RectD const & r1, m2::RectD const & r2, double eps);
} // namespace search

View file

@ -0,0 +1,68 @@
#include "search/highlighting.hpp"
#include "std/target_os.hpp"
namespace search
{
namespace
{
// Makes continuous range for tokens and prefix.
template <class Iter, class Value>
class CombinedIterator
{
Iter m_cur;
Iter m_end;
Value const * m_val;
public:
CombinedIterator(Iter cur, Iter end, Value const * val) : m_cur(cur), m_end(end), m_val(val) {}
Value const & operator*() const
{
ASSERT(m_val != nullptr || m_cur != m_end, ("dereferencing of an empty iterator"));
if (m_cur != m_end)
return *m_cur;
return *m_val;
}
CombinedIterator & operator++()
{
if (m_cur != m_end)
++m_cur;
else
m_val = nullptr;
return *this;
}
bool operator==(CombinedIterator const & other) const { return m_val == other.m_val && m_cur == other.m_cur; }
bool operator!=(CombinedIterator const & other) const { return !(*this == other); }
};
} // namespace
void HighlightResult(QueryTokens const & tokens, strings::UniString const & prefix, Result & res)
{
using Iter = QueryTokens::const_iterator;
using CombinedIter = CombinedIterator<Iter, strings::UniString>;
CombinedIter beg(tokens.begin(), tokens.end(), prefix.empty() ? nullptr : &prefix);
CombinedIter end(tokens.end() /* cur */, tokens.end() /* end */, nullptr);
// Highlight Title (potentially including branch)
std::string titleForHighlighting = res.GetString();
#if defined(OMIM_OS_IPHONE)
std::string const & branch = res.GetBranch();
// On iOS we append branch text to the title for highlighting if it's not already present.
if (!branch.empty() && titleForHighlighting.find(branch) == std::string::npos)
titleForHighlighting += " " + branch;
#endif
SearchStringTokensIntersectionRanges(
titleForHighlighting, beg, end, [&](std::pair<uint16_t, uint16_t> const & range) { res.AddHighlightRange(range); });
// Highlight description.
SearchStringTokensIntersectionRanges(res.GetAddress(), beg, end, [&](std::pair<uint16_t, uint16_t> const & range)
{ res.AddDescHighlightRange(range); });
}
} // namespace search

View file

@ -0,0 +1,62 @@
#pragma once
#include "indexer/search_delimiters.hpp"
#include "search/common.hpp"
#include "search/result.hpp"
#include "base/string_utils.hpp"
#include <cstdint>
#include <string>
#include <utility>
namespace search
{
template <typename LowTokensIter, typename F>
void SearchStringTokensIntersectionRanges(std::string const & s, LowTokensIter itLowBeg, LowTokensIter itLowEnd, F && f)
{
// split input query by tokens and prefix
search::Delimiters delimsTest;
size_t pos = 0;
strings::UniString const str = strings::MakeUniString(s);
size_t const strLen = str.size();
while (pos < strLen)
{
// skip delimeters
while (pos < strLen && delimsTest(str[pos]))
++pos;
size_t const beg = pos;
// find token
while (pos < strLen && !delimsTest(str[pos]))
++pos;
strings::UniString subStr;
subStr.assign(str.begin() + beg, str.begin() + pos);
size_t maxCount = 0;
std::pair<uint16_t, uint16_t> result(0, 0);
for (auto itLow = itLowBeg; itLow != itLowEnd; ++itLow)
{
size_t const cnt = strings::CountNormLowerSymbols(subStr, *itLow);
if (cnt > maxCount)
{
maxCount = cnt;
result.first = beg;
result.second = cnt;
}
}
if (result.second != 0)
f(result);
}
}
// Adds to |res| the ranges that match the query tokens and, therefore, should be highlighted.
// The query is passed in |tokens| and |prefix|.
void HighlightResult(QueryTokens const & tokens, strings::UniString const & prefix, Result & res);
} // namespace search

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,250 @@
#pragma once
#include "search/feature_loader.hpp"
#include "search/projection_on_street.hpp"
#include "indexer/feature_decl.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "geometry/point2d.hpp"
#include "base/macros.hpp"
#include <map>
#include <queue>
#include <string>
#include <utility>
#include <vector>
class DataSource;
namespace search
{
struct ParsedNumber
{
public:
/// @todo Pass correct "American" notation flag.
ParsedNumber(std::string const & number, bool american = false);
std::string const & GetNumber() const { return m_fullN; }
bool IsOdd() const { return (m_startN % 2 == 1); }
int GetIntNumber() const { return m_startN; }
bool IsIntersect(ParsedNumber const & number, int offset = 0) const;
private:
std::string m_fullN;
int m_startN;
int m_endN;
};
class House
{
public:
House(std::string const & number, m2::PointD const & point) : m_number(number), m_point(point) {}
std::string const & GetNumber() const { return m_number.GetNumber(); }
int GetIntNumber() const { return m_number.GetIntNumber(); }
m2::PointD const & GetPosition() const { return m_point; }
/// @return \n
/// -1 - no match;
/// 0 - full match;
/// 1 - integer number match with odd (even).
/// 2 - integer number match.
int GetMatch(ParsedNumber const & number) const;
bool GetNearbyMatch(ParsedNumber const & number) const;
private:
ParsedNumber m_number;
m2::PointD m_point;
};
// NOTE: DO NOT DELETE instances of this class by a pointer/reference
// to ProjectionOnStreet, because both classes have non-virtual destructors.
struct HouseProjection : public ProjectionOnStreet
{
struct LessDistance
{
bool operator()(HouseProjection const * p1, HouseProjection const * p2) const
{
return p1->m_distMeters < p2->m_distMeters;
}
};
class EqualHouse
{
public:
explicit EqualHouse(House const * h) : m_house(h) {}
bool operator()(HouseProjection const * p) const { return m_house == p->m_house; }
private:
House const * m_house;
};
bool IsOdd() const { return (m_house->GetIntNumber() % 2 == 1); }
House const * m_house;
/// Distance in mercator, from street beginning to projection on street
double m_streetDistance;
};
// many features combines to street
class Street
{
public:
Street() : m_length(0.0), m_number(-1), m_housesRead(false) {}
void Reverse();
void SortHousesProjection();
/// Get limit rect for street with ortho offset to the left and right.
m2::RectD GetLimitRect(double offsetMeters) const;
double GetLength() const;
double GetPrefixLength(size_t numSegs) const;
static bool IsSameStreets(Street const * s1, Street const * s2) { return s1->m_processedName == s2->m_processedName; }
void SetName(std::string_view name);
std::string const & GetDbgName() const { return m_processedName; }
std::string const & GetName() const { return m_name; }
std::vector<m2::PointD> m_points;
std::vector<HouseProjection> m_houses;
double m_length; /// Length in mercator
int m_number; /// Some ordered number after merging
bool m_housesRead;
private:
std::string m_name;
std::string m_processedName;
};
class MergedStreet
{
public:
struct Index
{
size_t s, h;
Index() : s(0), h(0) {}
};
struct GreaterLength
{
bool operator()(MergedStreet const & s1, MergedStreet const & s2) const { return (s1.m_length > s2.m_length); }
};
MergedStreet() : m_length(0.0) {}
std::string const & GetDbgName() const;
std::string const & GetName() const;
bool IsHousesRead() const;
void FinishReadingHouses();
HouseProjection const * GetHousePivot(bool isOdd, bool & sign) const;
void Swap(MergedStreet & s)
{
m_cont.swap(s.m_cont);
std::swap(m_length, s.m_length);
}
Index Begin() const
{
Index i;
Next(i);
return i;
}
void Inc(Index & i) const
{
++i.h;
Next(i);
}
bool IsEnd(Index const & i) const { return i.s == m_cont.size(); }
HouseProjection const & Get(Index const & i) const
{
ASSERT(!IsEnd(i), ());
return m_cont[i.s]->m_houses[i.h];
}
std::deque<Street *> m_cont;
private:
void Erase(Index & i);
void Next(Index & i) const;
double m_length;
};
struct HouseResult
{
HouseResult(House const * house, MergedStreet const * street) : m_house(house), m_street(street) {}
bool operator<(HouseResult const & a) const { return m_house < a.m_house; }
bool operator==(HouseResult const & a) const { return m_house == a.m_house; }
m2::PointD const & GetOrg() const { return m_house->GetPosition(); }
House const * m_house;
MergedStreet const * m_street;
};
class HouseDetector
{
public:
using StreetMap = std::map<FeatureID, Street *>;
using HouseMap = std::map<FeatureID, House *>;
using StreetPtr = std::pair<Street *, bool>;
static int const DEFAULT_OFFSET_M = 200;
explicit HouseDetector(DataSource const & dataSource);
~HouseDetector();
int LoadStreets(std::vector<FeatureID> const & ids);
/// @return number of different joined streets.
int MergeStreets();
void ReadAllHouses(double offsetMeters = DEFAULT_OFFSET_M);
void GetHouseForName(std::string const & houseNumber, std::vector<HouseResult> & res);
void ClearCaches();
void ClearUnusedStreets(std::vector<FeatureID> const & ids);
private:
StreetPtr FindConnection(Street const * st, bool beg) const;
void MergeStreets(Street * st);
template <typename ProjectionCalculator>
void ReadHouse(FeatureType & f, Street * st, ProjectionCalculator & calc);
void ReadHouses(Street * st);
void SetMetersToMercator(double factor);
double GetApprLengthMeters(int index) const;
FeatureLoader m_loader;
StreetMap m_id2st;
HouseMap m_id2house;
std::vector<std::pair<m2::PointD, Street *>> m_end2st;
std::vector<MergedStreet> m_streets;
double m_metersToMercator = 0.0;
int m_streetNum = 0;
double m_houseOffsetM = 0.0;
};
std::string DebugPrint(HouseProjection const & p);
std::string DebugPrint(HouseResult const & r);
} // namespace search

View file

@ -0,0 +1,637 @@
#include "search/house_numbers_matcher.hpp"
#include "indexer/string_set.hpp"
#include <algorithm>
#include <iterator>
#include <limits>
#include <sstream>
#include <boost/iterator/transform_iterator.hpp>
using boost::make_transform_iterator;
namespace search
{
namespace house_numbers
{
using namespace std;
using namespace strings;
namespace
{
// Common strings in house numbers.
// To get this list, just run:
//
// ./clusterize-tag-values.lisp house-number-strings path-to-taginfo-db.db > strings.txt
// cat strings.txt |
// awk '{ if ($1 >= 100 && length($3) != 0) { printf("\"%s\",\n", $3) } }' |
// sort | uniq
//
// *NOTE* there is a list of exceptions at the end.
/// @todo By VNG: This list looks hillarious :) Definitely should set some lower bound number
/// to filter very exotic entries in addr:housenumber.
// Removed street keywords for now and ALL one-letter strings. It is sensitive for search speed, because:
// LooksLikeHouseNumber -> MatchBuildingsWithStreets -> *heavy* StreetVicinityLoader::GetStreet
// "av", "avenida",
// "ca", "cal", "calle", "carrera", "court",
// "da", "de", "di".
// "ga",
// "ł", "la",
// "ne",
// "pa", "par", "park", "plaza",
// "rd", "ro", "road",
// "so", "south", "st", "street",
// "vi",
// "way", "we", "west",
char const * g_strings[] = {
"aa", "ab", "abc", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj", "ak", "al", "am", "an", "ao", "ap", "aq", "ar",
"are", "as", "at", "au", "aw", "ax", "ay", "az", "azm", "ba", "bab", "bah", "bak", "bb", "bc", "bd", "be", "bedr",
"ben", "bf", "bg", "bh", "bij", "bis", "bk", "bl", "bldg", "blk", "bloc", "block", "bloco", "blok", "bm", "bmn",
"bn", "bo", "boe", "bol", "bor", "bov", "box", "bp", "br", "bra", "brc", "bs", "bsa", "bu", "building", "bv", "bwn",
"bx", "by", "cab", "cat", "cbi", "cbu", "cc", "ccz", "cd", "ce", "centre", "cfn", "cgc", "cjg", "cl", "club",
"cottage", "cottages", "cso", "cum", "db", "dd", "df", "dia", "dvu", "ec", "ee", "eh", "em", "en", "esm", "ev",
"fdo", "fer", "ff", "flat", "flats", "floor", "gar", "gara", "gas", "gb", "gg", "gr", "grg", "ha", "haus", "hh",
"hl", "ho", "house", "hr", "hs", "hv", "ii", "iii", "int", "iv", "ix", "jab", "jf", "jj", "jms", "jtg", "ka", "kab",
"kk", "kmb", "kmk", "knn", "koy", "kp", "kra", "ksn", "kud", "ldo", "ll", "local", "loja", "lot", "lote", "lsb",
"lt", "mac", "mad", "mah", "mak", "mat", "mb", "mbb", "mbn", "mch", "mei", "mks", "mm", "mny", "mo", "mok", "mor",
"msb", "mtj", "mtk", "mvd", "na", "ncc", "nij", "nn", "no", "nr", "nst", "nu", "nut", "of", "ofof", "old", "one",
"oo", "opl", "pa", "pap", "pav", "pb", "pch", "pg", "ph", "phd", "pkf", "plot", "po", "pos", "pp", "pr", "pra",
"pya", "qq", "quater", "ra", "rbo", "rear", "reisach", "rk", "rm", "rosso", "rs", "rw", "sab", "sal", "sav", "sb",
"sba", "sbb", "sbl", "sbn", "sbx", "sc", "sch", "sco", "seb", "sep", "sf", "sgr", "sir", "sj", "sl", "sm", "sn",
"snc", "som", "sp", "spi", "spn", "ss", "sta", "stc", "std", "stiege", "suite", "sur", "tam", "ter", "terrace",
"tf", "th", "the", "tl", "to", "torre", "tr", "traf", "trd", "ts", "tt", "tu", "uhm", "unit", "utc", "vii", "wa",
"wf", "wink", "wrh", "ws", "wsb", "xx", "za", "zh", "zona", "zu", "zw", "א", "ב", "ג", "α", "бб", "бл", "вл", "вх",
"лит", "разр", "стр", "тп", "уч", "участок", "", "丁目", "", "", "",
// List of exceptions
"владение"};
// Common strings in house numbers.
// To get this list, just run:
//
// ./clusterize-tag-values.lisp house-number path-to-taginfo-db.db > numbers.txt
// tail -n +2 numbers.txt | head -78 | sed 's/^.*) \(.*\) \[.*$/"\1"/g;s/[ -/]//g;s/$/,/' |
// sort | uniq
vector<string> const g_patterns = {"BL", "BLN", "BLNSL", "BN", "BNL", "BNSL", "L", "LL", "LN", "LNL", "LNLN", "LNN",
"N", "NBL", "NBLN", "NBN", "NBNBN", "NBNL", "NL", "NLBN", "NLL", "NLLN", "NLN",
"NLNL", "NLS", "NLSN", "NN", "NNBN", "NNL", "NNLN", "NNN", "NNS", "NS", "NSN", "NSS",
"S", "SL", "SLL", "SLN", "SN", "SNBNSS", "SNL", "SNN", "SS", "SSN", "SSS", "SSSS",
// List of exceptions
"NNBNL"};
// List of patterns which look like house numbers more than other patterns. Constructed by hand.
vector<string> const g_patternsStrict = {"N", "NBN", "NBL", "NL"};
// List of common synonyms for building parts. Constructed by hand.
char const * g_buildingPartSynonyms[] = {"building", "bldg", "bld", "bl", "unit", "block", "blk", "корпус",
"корп", "кор", "литер", "лит", "строение", "стр", "блок", "бл"};
// List of common stop words for buildings. Constructed by hand.
UniString const g_stopWords[] = {MakeUniString("дом"), MakeUniString("house"), MakeUniString("д")};
bool IsStopWord(UniString const & s, bool isPrefix)
{
for (auto const & p : g_stopWords)
if ((isPrefix && StartsWith(p, s)) || (!isPrefix && p == s))
return true;
return false;
}
class BuildingPartSynonymsMatcher
{
public:
using Synonyms = StringSet<UniChar, 4>;
BuildingPartSynonymsMatcher()
{
for (auto const & s : g_buildingPartSynonyms)
{
UniString const us = MakeUniString(s);
m_synonyms.Add(us.begin(), us.end());
}
}
// Returns true if |s| looks like a building synonym.
inline bool Has(UniString const & s) const { return m_synonyms.Has(s.begin(), s.end()) == Synonyms::Status::Full; }
private:
Synonyms m_synonyms;
};
class StringsMatcher
{
public:
using Strings = StringSet<UniChar, 8>;
StringsMatcher()
{
for (auto const & s : g_strings)
{
UniString const us = MakeUniString(s);
m_strings.Add(us.begin(), us.end());
}
for (auto const & s : g_buildingPartSynonyms)
{
UniString const us = MakeUniString(s);
m_strings.Add(us.begin(), us.end());
}
}
// Returns true when |s| may be a full substring of a house number,
// or a prefix of some valid substring of a house number, when
// |isPrefix| is true.
bool Has(UniString const & s, bool isPrefix) const
{
auto const status = m_strings.Has(s.begin(), s.end());
switch (status)
{
case Strings::Status::Absent: return false;
case Strings::Status::Prefix: return isPrefix;
case Strings::Status::Full: return true;
}
UNREACHABLE();
}
private:
Strings m_strings;
};
class HouseNumberClassifier
{
public:
using Patterns = StringSet<Token::Type, 4>;
HouseNumberClassifier(vector<string> const & patterns = g_patterns)
{
for (auto const & p : patterns)
m_patterns.Add(make_transform_iterator(p.begin(), &CharToType), make_transform_iterator(p.end(), &CharToType));
}
// Returns true when the string |s| looks like a valid house number,
// (or a prefix of some valid house number, when |isPrefix| is
// true).
bool LooksGood(UniString const & s, bool isPrefix) const
{
TokensT parse;
Tokenize(s, isPrefix, parse);
size_t i = 0;
for (size_t j = 0; j != parse.size(); ++j)
{
auto const & token = parse[j];
auto const type = token.m_type;
switch (type)
{
case Token::TYPE_SEPARATOR: break;
case Token::TYPE_GROUP_SEPARATOR: break;
case Token::TYPE_HYPHEN: break;
case Token::TYPE_SLASH: break;
case Token::TYPE_STRING:
{
if (IsStopWord(token.m_value, token.m_prefix))
break;
if (!m_matcher.Has(token.m_value, token.m_prefix))
return false;
[[fallthrough]];
}
case Token::TYPE_LETTER:
{
if (j == 0 && IsStopWord(token.m_value, token.m_prefix))
break;
[[fallthrough]];
}
case Token::TYPE_NUMBER:
case Token::TYPE_BUILDING_PART:
case Token::TYPE_BUILDING_PART_OR_LETTER:
parse[i] = std::move(parse[j]);
ASSERT(!parse[i].m_value.empty(), ());
++i;
}
}
parse.resize(i);
auto const status = m_patterns.Has(make_transform_iterator(parse.begin(), &TokenToType),
make_transform_iterator(parse.end(), &TokenToType));
switch (status)
{
case Patterns::Status::Absent: return false;
case Patterns::Status::Prefix: return true;
case Patterns::Status::Full: return true;
}
UNREACHABLE();
}
private:
static Token::Type CharToType(char c)
{
switch (c)
{
case 'N': return Token::TYPE_NUMBER;
case 'S': return Token::TYPE_STRING;
case 'B': return Token::TYPE_BUILDING_PART;
case 'L': return Token::TYPE_LETTER;
case 'U': return Token::TYPE_BUILDING_PART_OR_LETTER;
default: CHECK(false, ("Unexpected character:", c)); return Token::TYPE_SEPARATOR;
}
UNREACHABLE();
}
static Token::Type TokenToType(Token const & token) { return token.m_type; }
StringsMatcher m_matcher;
Patterns m_patterns;
};
Token::Type GetCharType(UniChar c)
{
static UniString const kSeps = MakeUniString(" \t\"\\().#~");
static UniString const kGroupSeps = MakeUniString(",|;+");
if (IsASCIIDigit(c))
return Token::TYPE_NUMBER;
if (find(kSeps.begin(), kSeps.end(), c) != kSeps.end())
return Token::TYPE_SEPARATOR;
if (find(kGroupSeps.begin(), kGroupSeps.end(), c) != kGroupSeps.end())
return Token::TYPE_GROUP_SEPARATOR;
if (c == '-')
return Token::TYPE_HYPHEN;
if (c == '/')
return Token::TYPE_SLASH;
return Token::TYPE_STRING;
}
bool IsLiteralType(Token::Type type)
{
return type == Token::TYPE_STRING || type == Token::TYPE_LETTER || type == Token::TYPE_BUILDING_PART_OR_LETTER;
}
// Leaves only numbers and letters, removes all trailing prefix
// tokens. Then, does following:
//
// * when there is at least one number, drops all tokens until the
// number and sorts the rest
// * when there are no numbers at all, sorts tokens
void SimplifyParse(TokensT & tokens)
{
if (!tokens.empty() && tokens.back().m_prefix)
tokens.pop_back();
size_t i = 0;
size_t j = 0;
while (j != tokens.size() && tokens[j].m_type != Token::TYPE_NUMBER)
++j;
for (; j != tokens.size(); ++j)
{
auto const type = tokens[j].m_type;
if (type == Token::TYPE_NUMBER || type == Token::TYPE_LETTER)
tokens[i++] = tokens[j];
}
if (i != 0)
{
tokens.resize(i);
sort(tokens.begin() + 1, tokens.end());
}
else
{
sort(tokens.begin(), tokens.end());
}
}
// Returns true when a sequence denoted by [b2, e2) is a subsequence
// of [b1, e1).
template <typename T1, typename T2>
bool IsSubsequence(T1 b1, T1 e1, T2 b2, T2 e2)
{
for (; b2 != e2; ++b1, ++b2)
{
while (b1 != e1 && *b1 < *b2)
++b1;
if (b1 == e1 || *b1 != *b2)
return false;
}
return true;
}
bool IsBuildingPartSynonym(UniString const & s)
{
static BuildingPartSynonymsMatcher const kMatcher;
return kMatcher.Has(s);
}
bool IsShortBuildingSynonym(UniString const & t)
{
static UniString const kSynonyms[] = {MakeUniString("к"), MakeUniString("с")};
for (auto const & s : kSynonyms)
if (t == s)
return true;
return false;
}
template <typename Fn>
void ForEachGroup(TokensT const & ts, Fn && fn)
{
size_t i = 0;
while (i < ts.size())
{
while (i < ts.size() && ts[i].m_type == Token::TYPE_GROUP_SEPARATOR)
++i;
size_t j = i;
while (j < ts.size() && ts[j].m_type != Token::TYPE_GROUP_SEPARATOR)
++j;
if (i != j)
fn(i, j);
i = j;
}
}
template <typename Fn>
void TransformString(UniString && token, Fn && fn)
{
static UniString const kLiter = MakeUniString("лит");
size_t const size = token.size();
if (IsBuildingPartSynonym(token))
{
fn(std::move(token), Token::TYPE_BUILDING_PART);
}
else if (size == 4 && StartsWith(token, kLiter))
{
fn(UniString(token.begin(), token.begin() + 3), Token::TYPE_BUILDING_PART);
fn(UniString(token.begin() + 3, token.end()), Token::TYPE_LETTER);
}
else if (size == 2)
{
UniString firstLetter(token.begin(), token.begin() + 1);
if (IsShortBuildingSynonym(firstLetter))
{
fn(std::move(firstLetter), Token::TYPE_BUILDING_PART);
fn(UniString(token.begin() + 1, token.end()), Token::TYPE_LETTER);
}
else
{
fn(std::move(token), Token::TYPE_STRING);
}
}
else if (size == 1)
{
if (IsShortBuildingSynonym(token))
fn(std::move(token), Token::TYPE_BUILDING_PART_OR_LETTER);
else
fn(std::move(token), Token::TYPE_LETTER);
}
else
{
fn(std::move(token), Token::TYPE_STRING);
}
}
} // namespace
uint64_t ToUInt(UniString const & s)
{
uint64_t res = 0;
uint64_t pow = 1;
int i = int(s.size()) - 1;
ASSERT(i >= 0 && i < std::numeric_limits<uint64_t>::digits10, (i));
for (; i >= 0; --i)
{
ASSERT(IsASCIIDigit(s[i]), (s[i]));
res += (s[i] - '0') * pow;
pow *= 10;
}
return res;
}
void Tokenize(UniString s, bool isPrefix, TokensT & ts)
{
MakeLowerCaseInplace(s);
auto addToken = [&ts](UniString && value, Token::Type type) { ts.emplace_back(std::move(value), type); };
size_t i = 0;
while (i < s.size())
{
Token::Type const type = GetCharType(s[i]);
size_t j = i + 1;
while (j < s.size() && GetCharType(s[j]) == type)
++j;
if (type != Token::TYPE_SEPARATOR)
{
UniString token(s.begin() + i, s.begin() + j);
if (type == Token::TYPE_STRING)
{
if (j != s.size() || !isPrefix)
{
TransformString(std::move(token), addToken);
}
else if (i + 1 == j)
{
ts.emplace_back(std::move(token), Token::TYPE_LETTER);
}
else
{
ts.emplace_back(std::move(token), Token::TYPE_STRING);
ts.back().m_prefix = true;
}
}
else
{
addToken(std::move(token), type);
}
}
i = j;
}
// Quite hacky loop from ts.size() - 1 towards 0.
for (size_t i = ts.size() - 1; i < ts.size(); --i)
{
if (ts[i].m_type != Token::TYPE_BUILDING_PART_OR_LETTER)
continue;
if (i + 1 == ts.size() || ts[i + 1].m_type == Token::TYPE_BUILDING_PART)
ts[i].m_type = Token::TYPE_LETTER;
else if (ts[i + 1].m_type == Token::TYPE_NUMBER)
ts[i].m_type = Token::TYPE_BUILDING_PART;
}
}
void ParseHouseNumber(UniString const & s, vector<TokensT> & parses)
{
TokensT tokens;
Tokenize(s, false /* isPrefix */, tokens);
bool numbersSequence = true;
ForEachGroup(tokens, [&tokens, &numbersSequence](size_t i, size_t j)
{
switch (j - i)
{
case 0: break;
case 1: numbersSequence = numbersSequence && tokens[i].m_type == Token::TYPE_NUMBER; break;
case 2:
numbersSequence =
numbersSequence && tokens[i].m_type == Token::TYPE_NUMBER && IsLiteralType(tokens[i + 1].m_type);
break;
default: numbersSequence = false; break;
}
});
size_t const oldSize = parses.size();
if (numbersSequence)
{
ForEachGroup(tokens, [&tokens, &parses](size_t i, size_t j)
{
parses.emplace_back();
auto & parse = parses.back();
for (size_t k = i; k < j; ++k)
parse.emplace_back(std::move(tokens[k]));
});
}
else
{
parses.emplace_back(std::move(tokens));
}
for (size_t i = oldSize; i < parses.size(); ++i)
SimplifyParse(parses[i]);
}
void ParseQuery(UniString const & query, bool queryIsPrefix, TokensT & parse)
{
Tokenize(query, queryIsPrefix, parse);
SimplifyParse(parse);
}
bool HouseNumbersMatch(UniString const & houseNumber, TokensT const & queryParse)
{
ASSERT(!houseNumber.empty() && !queryParse.empty(), ());
// Fast pre-check, helps to early exit without complex house number parsing.
if (IsASCIIDigit(houseNumber[0]) && IsASCIIDigit(queryParse[0].m_value[0]) &&
houseNumber[0] != queryParse[0].m_value[0])
{
return false;
}
vector<TokensT> houseNumberParses;
ParseHouseNumber(houseNumber, houseNumberParses);
for (auto & parse : houseNumberParses)
{
if (parse.empty())
continue;
if (parse[0] == queryParse[0] &&
(IsSubsequence(parse.begin() + 1, parse.end(), queryParse.begin() + 1, queryParse.end()) ||
IsSubsequence(queryParse.begin() + 1, queryParse.end(), parse.begin() + 1, parse.end())))
{
return true;
}
}
return false;
}
bool HouseNumbersMatchConscription(UniString const & houseNumber, TokensT const & queryParse)
{
auto const beg = houseNumber.begin();
auto const end = houseNumber.end();
auto i = std::find(beg, end, '/');
if (i != end)
{
// Conscription number / street number.
return HouseNumbersMatch(UniString(beg, i), queryParse) || HouseNumbersMatch(UniString(i + 1, end), queryParse);
}
return HouseNumbersMatch(houseNumber, queryParse);
}
bool HouseNumbersMatchRange(std::string_view const & hnRange, TokensT const & queryParse,
feature::InterpolType interpol)
{
ASSERT(!queryParse.empty() && interpol != feature::InterpolType::None, ());
if (queryParse[0].m_type != Token::TYPE_NUMBER)
return false;
uint64_t const val = ToUInt(queryParse[0].m_value);
bool const isEven = (val % 2 == 0);
if (interpol == feature::InterpolType::Odd && isEven)
return false;
if (interpol == feature::InterpolType::Even && !isEven)
return false;
// Generator makes valid normalized values.
size_t const i = hnRange.find(':');
if (i == std::string_view::npos)
{
ASSERT(false, (hnRange));
return false;
}
uint64_t left, right;
if (!strings::to_uint(hnRange.substr(0, i), left) || !strings::to_uint(hnRange.substr(i + 1), right))
{
ASSERT(false, (hnRange));
return false;
}
return left < val && val < right;
}
bool LooksLikeHouseNumber(UniString const & s, bool isPrefix)
{
static HouseNumberClassifier const classifier;
return classifier.LooksGood(s, isPrefix);
}
bool LooksLikeHouseNumber(string const & s, bool isPrefix)
{
return LooksLikeHouseNumber(MakeUniString(s), isPrefix);
}
bool LooksLikeHouseNumberStrict(UniString const & s)
{
static HouseNumberClassifier const classifier(g_patternsStrict);
return classifier.LooksGood(s, false /* isPrefix */);
}
bool LooksLikeHouseNumberStrict(string const & s)
{
return LooksLikeHouseNumberStrict(MakeUniString(s));
}
string DebugPrint(Token::Type type)
{
switch (type)
{
case Token::TYPE_NUMBER: return "Number";
case Token::TYPE_SEPARATOR: return "Separator";
case Token::TYPE_GROUP_SEPARATOR: return "GroupSeparator";
case Token::TYPE_HYPHEN: return "Hyphen";
case Token::TYPE_SLASH: return "Slash";
case Token::TYPE_STRING: return "String";
case Token::TYPE_BUILDING_PART: return "BuildingPart";
case Token::TYPE_LETTER: return "Letter";
case Token::TYPE_BUILDING_PART_OR_LETTER: return "BuildingPartOrLetter";
}
return "Unknown";
}
string DebugPrint(Token const & token)
{
ostringstream os;
os << "Token [" << DebugPrint(token.m_value) << ", " << DebugPrint(token.m_type) << "]";
return os.str();
}
} // namespace house_numbers
} // namespace search

View file

@ -0,0 +1,88 @@
#pragma once
#include "indexer/feature_utils.hpp"
#include "base/string_utils.hpp"
#include <string>
#include <utility>
#include <vector>
namespace search
{
namespace house_numbers
{
struct Token
{
enum Type
{
TYPE_NUMBER,
TYPE_SEPARATOR,
TYPE_GROUP_SEPARATOR,
TYPE_HYPHEN,
TYPE_SLASH,
TYPE_STRING,
TYPE_BUILDING_PART,
TYPE_LETTER,
TYPE_BUILDING_PART_OR_LETTER
};
Token() = default;
Token(strings::UniString const & value, Type type) : m_value(value), m_type(type) {}
Token(strings::UniString && value, Type type) : m_value(std::move(value)), m_type(type) {}
Token(Token &&) = default;
Token & operator=(Token &&) = default;
Token & operator=(Token const &) = default;
bool operator==(Token const & rhs) const { return m_type == rhs.m_type && m_value == rhs.m_value; }
bool operator!=(Token const & rhs) const { return !(*this == rhs); }
bool operator<(Token const & rhs) const
{
if (m_type != rhs.m_type)
return m_type < rhs.m_type;
return m_value < rhs.m_value;
}
strings::UniString m_value;
Type m_type = TYPE_SEPARATOR;
bool m_prefix = false;
};
using TokensT = std::vector<Token>;
// Used to convert Token::Type::TYPE_NUMBER into int value.
uint64_t ToUInt(strings::UniString const & s);
// Tokenizes |s| that may be a house number.
void Tokenize(strings::UniString s, bool isPrefix, TokensT & ts);
// Parses a string that can be one or more house numbers. This method
// can be used to parse addr:housenumber fields.
void ParseHouseNumber(strings::UniString const & s, std::vector<TokensT> & parses);
// Parses a part of search query that can be a house number.
void ParseQuery(strings::UniString const & query, bool queryIsPrefix, TokensT & parse);
/// @return true if house number matches to a given parsed query.
/// @{
bool HouseNumbersMatch(strings::UniString const & houseNumber, TokensT const & queryParse);
bool HouseNumbersMatchConscription(strings::UniString const & houseNumber, TokensT const & queryParse);
bool HouseNumbersMatchRange(std::string_view const & hnRange, TokensT const & queryParse,
feature::InterpolType interpol);
/// @}
// Returns true if |s| looks like a house number.
bool LooksLikeHouseNumber(strings::UniString const & s, bool isPrefix);
bool LooksLikeHouseNumber(std::string const & s, bool isPrefix);
bool LooksLikeHouseNumberStrict(strings::UniString const & s);
bool LooksLikeHouseNumberStrict(std::string const & s);
std::string DebugPrint(Token::Type type);
std::string DebugPrint(Token const & token);
} // namespace house_numbers
} // namespace search

View file

@ -0,0 +1,156 @@
#include "search/house_to_street_table.hpp"
#include "indexer/mwm_set.hpp"
#include "platform/mwm_traits.hpp"
#include "coding/files_container.hpp"
#include "coding/map_uint32_to_val.hpp"
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "coding/writer.hpp"
#include "base/assert.hpp"
#include "base/checked_cast.hpp"
#include "base/logging.hpp"
#include "defines.hpp"
#include <vector>
namespace search
{
using namespace std;
namespace
{
class EliasFanoMap : public HouseToStreetTable
{
public:
using Map = MapUint32ToValue<uint32_t>;
explicit EliasFanoMap(unique_ptr<Reader> && reader) : m_reader(std::move(reader))
{
ASSERT(m_reader, ());
auto readBlockCallback = [](auto & source, uint32_t blockSize, vector<uint32_t> & values)
{
values.resize(blockSize);
values[0] = ReadVarUint<uint32_t>(source);
for (size_t i = 1; i < blockSize && source.Size() > 0; ++i)
{
// Feature ids for all real features are less than numeric_limits<int32_t>::max()
// so we can use delta coding with int32_t difference type.
values[i] = base::asserted_cast<uint32_t>(values[i - 1] + ReadVarInt<int32_t>(source));
}
};
m_map = Map::Load(*m_reader, readBlockCallback);
ASSERT(m_map.get(), ());
}
// HouseToStreetTable overrides:
std::optional<Result> Get(uint32_t houseId) const override
{
uint32_t fID;
if (!m_map->Get(houseId, fID))
return {};
return {{fID, StreetIdType::FeatureId}};
}
private:
unique_ptr<Reader> m_reader;
unique_ptr<Map> m_map;
};
class DummyTable : public HouseToStreetTable
{
public:
// HouseToStreetTable overrides:
std::optional<Result> Get(uint32_t /* houseId */) const override { return {}; }
};
unique_ptr<HouseToStreetTable> LoadHouseTableImpl(MwmValue const & value, std::string const & tag)
{
unique_ptr<HouseToStreetTable> result;
try
{
auto const format = version::MwmTraits(value.GetMwmVersion()).GetHouseToStreetTableFormat();
CHECK_EQUAL(format, version::MwmTraits::HouseToStreetTableFormat::HouseToStreetTableWithHeader, ());
FilesContainerR::TReader reader = value.m_cont.GetReader(tag);
HouseToStreetTable::Header header;
ReaderSource source(reader);
header.Read(source);
CHECK(header.m_version == HouseToStreetTable::Version::V2, ());
auto subreader = reader.GetPtr()->CreateSubReader(header.m_tableOffset, header.m_tableSize);
CHECK(subreader, ());
result = make_unique<EliasFanoMap>(std::move(subreader));
}
catch (Reader::OpenException const & ex)
{
LOG(LERROR, (ex.Msg()));
}
if (!result)
result = make_unique<DummyTable>();
return result;
}
} // namespace
std::unique_ptr<HouseToStreetTable> LoadHouseToStreetTable(MwmValue const & value)
{
return LoadHouseTableImpl(value, FEATURE2STREET_FILE_TAG);
}
std::unique_ptr<HouseToStreetTable> LoadHouseToPlaceTable(MwmValue const & value)
{
return LoadHouseTableImpl(value, FEATURE2PLACE_FILE_TAG);
}
// HouseToStreetTableBuilder -----------------------------------------------------------------------
void HouseToStreetTableBuilder::Put(uint32_t houseId, uint32_t streetId)
{
m_builder.Put(houseId, streetId);
}
void HouseToStreetTableBuilder::Freeze(Writer & writer) const
{
uint64_t const startOffset = writer.Pos();
CHECK(coding::IsAlign8(startOffset), ());
HouseToStreetTable::Header header;
header.Serialize(writer);
uint64_t bytesWritten = writer.Pos();
coding::WritePadding(writer, bytesWritten);
// Each street id is encoded as delta from some prediction.
// First street id in the block encoded as VarUint, all other street ids in the block
// encoded as VarInt delta from previous id
auto const writeBlockCallback = [](auto & w, auto begin, auto end)
{
CHECK(begin != end, ());
WriteVarUint(w, *begin);
auto prevIt = begin;
for (auto it = begin + 1; it != end; ++it)
{
int32_t const delta = base::asserted_cast<int32_t>(*it) - *prevIt;
WriteVarInt(w, delta);
prevIt = it;
}
};
header.m_tableOffset = base::asserted_cast<uint32_t>(writer.Pos() - startOffset);
m_builder.Freeze(writer, writeBlockCallback);
header.m_tableSize = base::asserted_cast<uint32_t>(writer.Pos() - header.m_tableOffset - startOffset);
auto const endOffset = writer.Pos();
writer.Seek(startOffset);
header.Serialize(writer);
writer.Seek(endOffset);
}
} // namespace search

View file

@ -0,0 +1,25 @@
#pragma once
#include "indexer/house_to_street_iface.hpp"
#include "coding/map_uint32_to_val.hpp"
#include <memory>
class MwmValue;
class Writer;
namespace search
{
std::unique_ptr<HouseToStreetTable> LoadHouseToStreetTable(MwmValue const & value);
std::unique_ptr<HouseToStreetTable> LoadHouseToPlaceTable(MwmValue const & value);
class HouseToStreetTableBuilder
{
public:
void Put(uint32_t featureId, uint32_t offset);
void Freeze(Writer & writer) const;
private:
MapUint32ToValueBuilder<uint32_t> m_builder;
};
} // namespace search

24
libs/search/idf_map.cpp Normal file
View file

@ -0,0 +1,24 @@
#include "search/idf_map.hpp"
#include "base/assert.hpp"
namespace search
{
IdfMap::IdfMap(Delegate const & delegate, double unknownIdf) : m_delegate(delegate), m_unknownIdf(unknownIdf)
{
ASSERT_GREATER(m_unknownIdf, 0.0, ());
}
double IdfMap::GetImpl(Map & idfs, strings::UniString const & s, bool isPrefix)
{
auto const it = idfs.find(s);
if (it != idfs.cend())
return it->second;
auto const df = static_cast<double>(m_delegate.GetNumDocs(s, isPrefix));
auto const idf = df == 0 ? m_unknownIdf : 1.0 / df;
idfs[s] = idf;
return idf;
}
} // namespace search

38
libs/search/idf_map.hpp Normal file
View file

@ -0,0 +1,38 @@
#pragma once
#include "base/string_utils.hpp"
#include <cstdint>
#include <map>
namespace search
{
class IdfMap
{
public:
struct Delegate
{
virtual ~Delegate() = default;
virtual uint64_t GetNumDocs(strings::UniString const & token, bool isPrefix) const = 0;
};
IdfMap(Delegate const & delegate, double unknownIdf);
double Get(strings::UniString const & s, bool isPrefix)
{
return GetImpl(isPrefix ? m_prefixIdfs : m_fullIdfs, s, isPrefix);
}
private:
using Map = std::map<strings::UniString, double>;
double GetImpl(Map & idfs, strings::UniString const & s, bool isPrefix);
Map m_fullIdfs;
Map m_prefixIdfs;
Delegate const & m_delegate;
double m_unknownIdf;
};
} // namespace search

View file

@ -0,0 +1,349 @@
#include "search/intermediate_result.hpp"
#include "search/reverse_geocoder.hpp"
#include "storage/country_info_getter.hpp"
#include "indexer/classificator.hpp"
#include "indexer/feature.hpp"
#include "indexer/feature_algo.hpp"
#include "indexer/feature_utils.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/road_shields_parser.hpp"
#include "platform/localization.hpp"
#include "platform/measurement_utils.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include "3party/opening_hours/opening_hours.hpp"
namespace search
{
using namespace std;
namespace
{
class SkipRegionInfo
{
static size_t constexpr kCount = 2;
uint32_t m_types[kCount];
public:
SkipRegionInfo()
{
base::StringIL arr[] = {{"place", "continent"}, {"place", "country"}};
static_assert(kCount == ARRAY_SIZE(arr), "");
Classificator const & c = classif();
for (size_t i = 0; i < kCount; ++i)
m_types[i] = c.GetTypeByPath(arr[i]);
}
bool IsSkip(uint32_t type) const
{
for (uint32_t t : m_types)
if (t == type)
return true;
return false;
}
};
} // namespace
// PreRankerResult ---------------------------------------------------------------------------------
PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & info,
vector<ResultTracer::Branch> const & provenance)
: m_id(id)
, m_info(info)
, m_isRelaxed(base::IsExist(provenance, ResultTracer::Branch::Relaxed))
#ifdef SEARCH_USE_PROVENANCE
, m_provenance(provenance)
#endif
{
ASSERT(m_id.IsValid(), ());
m_matchedTokensNumber = 0;
for (auto const & r : m_info.m_tokenRanges)
m_matchedTokensNumber += r.Size();
}
// static
bool PreRankerResult::LessRankAndPopularity(PreRankerResult const & lhs, PreRankerResult const & rhs)
{
if (lhs.m_info.m_rank != rhs.m_info.m_rank)
return lhs.m_info.m_rank > rhs.m_info.m_rank;
if (lhs.m_info.m_popularity != rhs.m_info.m_popularity)
return lhs.m_info.m_popularity > rhs.m_info.m_popularity;
/// @todo Remove this epilog when we will have _enough_ ranks and popularities in data.
return lhs.m_info.m_distanceToPivot < rhs.m_info.m_distanceToPivot;
}
// static
bool PreRankerResult::LessDistance(PreRankerResult const & lhs, PreRankerResult const & rhs)
{
return lhs.m_info.m_distanceToPivot < rhs.m_info.m_distanceToPivot;
}
// static
int PreRankerResult::CompareByTokensMatch(PreRankerResult const & lhs, PreRankerResult const & rhs)
{
if (lhs.m_info.m_isCommonMatchOnly != rhs.m_info.m_isCommonMatchOnly)
return rhs.m_info.m_isCommonMatchOnly ? -1 : 1;
auto const & lRange = lhs.m_info.InnermostTokenRange();
auto const & rRange = rhs.m_info.InnermostTokenRange();
if (lRange.Size() != rRange.Size())
return lRange.Size() > rRange.Size() ? -1 : 1;
if (lhs.m_matchedTokensNumber != rhs.m_matchedTokensNumber)
return lhs.m_matchedTokensNumber > rhs.m_matchedTokensNumber ? -1 : 1;
if (lRange.Begin() != rRange.Begin())
return lRange.Begin() < rRange.Begin() ? -1 : 1;
return 0;
}
// static
bool PreRankerResult::LessByExactMatch(PreRankerResult const & lhs, PreRankerResult const & rhs)
{
bool const lScore = lhs.m_info.m_exactMatch && lhs.m_info.m_allTokensUsed;
bool const rScore = rhs.m_info.m_exactMatch && rhs.m_info.m_allTokensUsed;
if (lScore != rScore)
return lScore;
return CompareByTokensMatch(lhs, rhs) == -1;
}
bool PreRankerResult::CategoriesComparator::operator()(PreRankerResult const & lhs, PreRankerResult const & rhs) const
{
if (m_positionIsInsideViewport)
return lhs.GetDistance() < rhs.GetDistance();
if (m_detailedScale)
{
bool const lhsInside = m_viewport.IsPointInside(lhs.GetInfo().m_center);
bool const rhsInside = m_viewport.IsPointInside(rhs.GetInfo().m_center);
if (lhsInside && !rhsInside)
return true;
if (rhsInside && !lhsInside)
return false;
}
return lhs.GetPopularity() > rhs.GetPopularity();
}
std::string DebugPrint(PreRankerResult const & r)
{
ostringstream os;
os << "PreRankerResult "
<< "{ FID: " << r.GetId().m_index // index is enough here for debug purpose
<< "; m_matchedTokensNumber: " << r.m_matchedTokensNumber << "; m_isRelaxed: " << r.m_isRelaxed << "; "
<< DebugPrint(r.m_info) << " }";
return os.str();
}
// RankerResult ------------------------------------------------------------------------------------
RankerResult::RankerResult(FeatureType & ft, m2::PointD const & center, string displayName, string const & fileName)
: m_types(ft)
, m_str(std::move(displayName))
, m_id(ft.GetID())
, m_resultType(ftypes::IsBuildingChecker::Instance()(m_types) ? Type::Building : Type::Feature)
, m_geomType(ft.GetGeomType())
{
ASSERT(m_id.IsValid(), ());
ASSERT(!m_types.Empty(), ());
m_types.SortBySpec();
m_region.SetParams(fileName, center);
FillDetails(ft, m_str, m_details);
}
RankerResult::RankerResult(FeatureType & ft, std::string const & fileName)
: RankerResult(ft, feature::GetCenter(ft, FeatureType::WORST_GEOMETRY), std::string(ft.GetReadableName()), fileName)
{}
RankerResult::RankerResult(double lat, double lon)
: m_str("(" + measurement_utils::FormatLatLon(lat, lon) + ")")
, m_resultType(Type::LatLon)
{
m_region.SetParams({}, mercator::FromLatLon(lat, lon));
}
RankerResult::RankerResult(m2::PointD const & coord, string_view postcode)
: m_str(postcode)
, m_resultType(Type::Postcode)
{
strings::AsciiToUpper(m_str);
m_region.SetParams({}, coord);
}
bool RankerResult::GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype,
storage::CountryId & countryId) const
{
static SkipRegionInfo const checker;
if (checker.IsSkip(ftype))
return false;
return m_region.GetCountryId(infoGetter, countryId);
}
bool RankerResult::IsEqualBasic(RankerResult const & r) const
{
return (m_geomType == r.m_geomType && GetRankingInfo().m_type == r.GetRankingInfo().m_type && m_str == r.m_str);
}
bool RankerResult::IsEqualCommon(RankerResult const & r) const
{
return (IsEqualBasic(r) && GetBestType() == r.GetBestType());
}
bool RankerResult::IsStreet() const
{
return ftypes::IsStreetOrSquareChecker::Instance()(m_types);
}
uint32_t RankerResult::GetBestType(vector<uint32_t> const * preferredTypes /* = nullptr */) const
{
if (preferredTypes)
{
ASSERT(is_sorted(preferredTypes->begin(), preferredTypes->end()), ());
for (uint32_t type : m_types)
if (binary_search(preferredTypes->begin(), preferredTypes->end(), type))
return type;
}
return m_types.GetBestType();
}
// RankerResult::RegionInfo ------------------------------------------------------------------------
bool RankerResult::RegionInfo::GetCountryId(storage::CountryInfoGetter const & infoGetter,
storage::CountryId & countryId) const
{
if (!m_countryId.empty())
{
countryId = m_countryId;
return true;
}
auto const id = infoGetter.GetRegionCountryId(m_point);
if (id != storage::kInvalidCountryId)
{
countryId = id;
return true;
}
return false;
}
// Functions ---------------------------------------------------------------------------------------
void FillDetails(FeatureType & ft, std::string const & name, Result::Details & details)
{
if (details.m_isInitialized)
return;
std::string_view airportIata = ft.GetMetadata(feature::Metadata::FMD_AIRPORT_IATA);
std::string brand{ft.GetMetadata(feature::Metadata::FMD_BRAND)};
if (!brand.empty())
{
brand = platform::GetLocalizedBrandName(brand);
if (name.find(brand) != std::string::npos)
brand.clear();
}
/// @todo Avoid temporary string when OpeningHours (boost::spirit) will allow string_view.
std::string const openHours(ft.GetMetadata(feature::Metadata::FMD_OPEN_HOURS));
if (!openHours.empty())
{
using namespace osmoh;
OpeningHours const oh((std::string(openHours)));
if (oh.IsValid())
{
/// @todo We should check closed/open time for specific feature's timezone.
time_t const now = time(nullptr);
auto const info = oh.GetInfo(now);
if (info.state != RuleState::Unknown)
{
// In else case value is osm::Unknown, it's set in preview's constructor.
details.m_isOpenNow = (info.state == RuleState::Open) ? osm::Yes : osm::No;
details.m_minutesUntilOpen = (info.nextTimeOpen - now) / 60;
details.m_minutesUntilClosed = (info.nextTimeClosed - now) / 60;
}
}
}
feature::TypesHolder const typesHolder(ft);
std::string stars;
uint8_t starsCount = 0;
bool const isHotel = ftypes::IsHotelChecker::Instance()(typesHolder);
if (isHotel && strings::to_uint(ft.GetMetadata(feature::Metadata::FMD_STARS), starsCount))
stars = feature::FormatStars(starsCount);
auto const cuisines = feature::GetLocalizedCuisines(typesHolder);
auto const cuisine = strings::JoinStrings(cuisines, feature::kFieldsSeparator);
auto const recycling =
strings::JoinStrings(feature::GetLocalizedRecyclingTypes(typesHolder), feature::kFieldsSeparator);
auto const roadShields = ftypes::GetRoadShieldsNames(ft);
auto const roadShield = strings::JoinStrings(roadShields, feature::kFieldsSeparator);
auto const fee = feature::GetLocalizedFeeType(typesHolder);
auto const elevation = feature::FormatElevation(ft.GetMetadata(feature::Metadata::FMD_ELE));
std::string description;
auto const append = [&description](std::string_view sv)
{
if (sv.empty())
return;
if (!description.empty())
description += feature::kFieldsSeparator;
description += sv;
};
append(stars);
append(airportIata);
append(roadShield);
append(brand);
append(elevation);
append(cuisine);
append(recycling);
append(fee);
details.m_description = std::move(description);
details.m_isInitialized = true;
}
string DebugPrint(RankerResult const & r)
{
stringstream ss;
ss << "RankerResult "
<< "{ FID: " << r.GetID().m_index // index is enough here for debug purpose
<< "; Name: " << r.GetName() << "; Type: " << classif().GetReadableObjectName(r.GetBestType())
<< "; Linear model rank: " << r.GetLinearModelRank();
#ifdef SEARCH_USE_PROVENANCE
if (!r.m_provenance.empty())
ss << "; Provenance: " << ::DebugPrint(r.m_provenance);
#endif
if (r.m_dbgInfo)
ss << "; " << DebugPrint(*r.m_dbgInfo);
else
ss << "; " << DebugPrint(r.GetRankingInfo());
ss << " }";
return ss.str();
}
} // namespace search

View file

@ -0,0 +1,194 @@
#pragma once
#include "search/pre_ranking_info.hpp"
#include "search/ranking_info.hpp"
#include "search/result.hpp"
#include "search/tracer.hpp"
#include "storage/storage_defines.hpp"
#include "indexer/feature_data.hpp"
#include "geometry/point2d.hpp"
#include <string>
#include <vector>
class FeatureType;
namespace storage
{
class CountryInfoGetter;
struct CountryInfo;
} // namespace storage
namespace search
{
class ReverseGeocoder;
// First pass results class. Objects are created during search in trie.
// Works fast because it does not load features.
class PreRankerResult
{
public:
PreRankerResult(FeatureID const & id, PreRankingInfo const & info,
std::vector<ResultTracer::Branch> const & provenance);
/// @name Compare functions.
/// @return true (-1) if lhs is better (less in sort) than rhs.
/// @{
static bool LessRankAndPopularity(PreRankerResult const & lhs, PreRankerResult const & rhs);
static bool LessDistance(PreRankerResult const & lhs, PreRankerResult const & rhs);
static int CompareByTokensMatch(PreRankerResult const & lhs, PreRankerResult const & rhs);
static bool LessByExactMatch(PreRankerResult const & lhs, PreRankerResult const & rhs);
/// @}
struct CategoriesComparator
{
bool operator()(PreRankerResult const & lhs, PreRankerResult const & rhs) const;
m2::RectD m_viewport;
bool m_positionIsInsideViewport = false;
bool m_detailedScale = false;
};
FeatureID const & GetId() const { return m_id; }
double GetDistance() const { return m_info.m_distanceToPivot; }
uint8_t GetRank() const { return m_info.m_rank; }
uint8_t GetPopularity() const { return m_info.m_popularity; }
PreRankingInfo const & GetInfo() const { return m_info; }
#ifdef SEARCH_USE_PROVENANCE
std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }
#endif
// size_t GetInnermostTokensNumber() const { return m_info.InnermostTokenRange().Size(); }
// size_t GetMatchedTokensNumber() const { return m_matchedTokensNumber; }
bool IsNotRelaxed() const { return !m_isRelaxed; }
bool SkipForViewportSearch(size_t queryTokensNumber) const
{
return m_isRelaxed || m_matchedTokensNumber + 1 < queryTokensNumber;
}
void SetRank(uint8_t rank) { m_info.m_rank = rank; }
void SetPopularity(uint8_t popularity) { m_info.m_popularity = popularity; }
void SetDistanceToPivot(double distance) { m_info.m_distanceToPivot = distance; }
void SetCenter(m2::PointD const & center)
{
m_info.m_center = center;
m_info.m_centerLoaded = true;
}
friend std::string DebugPrint(PreRankerResult const & r);
private:
FeatureID m_id;
PreRankingInfo m_info;
size_t m_matchedTokensNumber;
bool m_isRelaxed;
#ifdef SEARCH_USE_PROVENANCE
// The call path in the Geocoder that leads to this result.
std::vector<ResultTracer::Branch> m_provenance;
#endif
};
// Second result class. Objects are created during reading of features.
// Read and fill needed info for ranking and getting final results.
class RankerResult
{
public:
enum class Type : uint8_t
{
LatLon = 0,
Feature,
Building, //!< Buildings are not filtered out in duplicates filter.
Postcode
};
/// For Type::Feature and Type::Building.
RankerResult(FeatureType & ft, m2::PointD const & center, std::string displayName, std::string const & fileName);
RankerResult(FeatureType & ft, std::string const & fileName);
/// For Type::LatLon.
RankerResult(double lat, double lon);
/// For Type::Postcode.
RankerResult(m2::PointD const & coord, std::string_view postcode);
bool IsStreet() const;
StoredRankingInfo const & GetRankingInfo() const { return m_info; }
void SetRankingInfo(RankingInfo const & info, bool viewportMode)
{
m_finalRank = info.GetLinearModelRank(viewportMode);
m_info = info;
}
FeatureID const & GetID() const { return m_id; }
std::string const & GetName() const { return m_str; }
feature::TypesHolder const & GetTypes() const { return m_types; }
Type GetResultType() const { return m_resultType; }
m2::PointD GetCenter() const { return m_region.m_point; }
feature::GeomType GetGeomType() const { return m_geomType; }
Result::Details GetDetails() const { return m_details; }
double GetDistanceToPivot() const { return m_info.m_distanceToPivot; }
double GetLinearModelRank() const { return m_finalRank; }
bool GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype,
storage::CountryId & countryId) const;
bool IsEqualBasic(RankerResult const & r) const;
bool IsEqualCommon(RankerResult const & r) const;
uint32_t GetBestType(std::vector<uint32_t> const * preferredTypes = nullptr) const;
#ifdef SEARCH_USE_PROVENANCE
std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }
#endif
friend std::string DebugPrint(RankerResult const & r);
private:
friend class RankerResultMaker;
friend class Ranker;
struct RegionInfo
{
storage::CountryId m_countryId;
m2::PointD m_point;
void SetParams(storage::CountryId const & countryId, m2::PointD const & point)
{
m_countryId = countryId;
m_point = point;
}
bool GetCountryId(storage::CountryInfoGetter const & infoGetter, storage::CountryId & countryId) const;
};
RegionInfo m_region;
feature::TypesHolder m_types;
std::string m_str;
Result::Details m_details;
StoredRankingInfo m_info;
std::shared_ptr<RankingInfo> m_dbgInfo; // used in debug logs and tests, nullptr in production
FeatureID m_id;
double m_finalRank;
Type m_resultType;
feature::GeomType m_geomType = feature::GeomType::Undefined;
#ifdef SEARCH_USE_PROVENANCE
// The call path in the Geocoder that leads to this result.
std::vector<ResultTracer::Branch> m_provenance;
#endif
};
void FillDetails(FeatureType & ft, std::string const & name, Result::Details & details);
} // namespace search

View file

@ -0,0 +1,74 @@
#include "search/intersection_result.hpp"
#include "base/assert.hpp"
#include <sstream>
namespace search
{
// static
uint32_t const IntersectionResult::kInvalidId;
void IntersectionResult::Set(Model::Type type, uint32_t id)
{
switch (type)
{
case Model::TYPE_SUBPOI: m_subpoi = id; break;
case Model::TYPE_COMPLEX_POI: m_complexPoi = id; break;
case Model::TYPE_BUILDING: m_building = id; break;
case Model::TYPE_STREET: m_street = id; break;
case Model::TYPE_SUBURB: m_suburb = id; break;
/// @todo Store city (place) name for ranking? I suspect that it should work fine now, without it.
case Model::TYPE_CITY: break;
case Model::TYPE_VILLAGE:
case Model::TYPE_STATE:
case Model::TYPE_COUNTRY:
case Model::TYPE_UNCLASSIFIED:
case Model::TYPE_COUNT: ASSERT(false, ("Unsupported type.")); break;
}
}
uint32_t IntersectionResult::InnermostResult() const
{
if (m_subpoi != kInvalidId)
return m_subpoi;
if (m_complexPoi != kInvalidId)
return m_complexPoi;
if (m_building != kInvalidId)
return m_building;
if (m_street != kInvalidId)
return m_street;
if (m_suburb != kInvalidId)
return m_suburb;
return kInvalidId;
}
void IntersectionResult::Clear()
{
m_subpoi = kInvalidId;
m_complexPoi = kInvalidId;
m_building = kInvalidId;
m_street = kInvalidId;
m_suburb = kInvalidId;
}
std::string DebugPrint(IntersectionResult const & result)
{
std::ostringstream os;
os << "IntersectionResult [ ";
if (result.m_subpoi != IntersectionResult::kInvalidId)
os << "SUBPOI:" << result.m_subpoi << " ";
if (result.m_complexPoi != IntersectionResult::kInvalidId)
os << "COMPLEX_POI:" << result.m_complexPoi << " ";
if (result.m_building != IntersectionResult::kInvalidId)
os << "BUILDING:" << result.m_building << " ";
if (result.m_street != IntersectionResult::kInvalidId)
os << "STREET:" << result.m_street << " ";
if (result.m_suburb != IntersectionResult::kInvalidId)
os << "SUBURB:" << result.m_suburb << " ";
os << "]";
return os.str();
}
} // namespace search

View file

@ -0,0 +1,42 @@
#pragma once
#include "search/model.hpp"
#include <cstdint>
#include <limits>
#include <string>
namespace search
{
// This class holds higher-level features for an intersection result,
// i.e. BUILDING and STREET for POI or STREET for BUILDING.
struct IntersectionResult
{
static uint32_t constexpr kInvalidId = std::numeric_limits<uint32_t>::max();
void Set(Model::Type type, uint32_t id);
// Returns the first valid feature among the [SUBPOI, COMPLEX_POI, BUILDING, STREET].
uint32_t InnermostResult() const;
// Returns true when at least one valid feature exists.
inline bool IsValid() const { return InnermostResult() != kInvalidId; }
// Building == Streets means that we have actual street result, but got here
// via _fake_ TYPE_BUILDING layer (see MatchPOIsAndBuildings).
inline bool IsFakeBuildingButStreet() const { return m_building != kInvalidId && m_building == m_street; }
inline bool IsPoiAndComplexPoi() const { return m_complexPoi != kInvalidId && m_subpoi != kInvalidId; }
// Clears all fields to an invalid state.
void Clear();
uint32_t m_subpoi = kInvalidId;
uint32_t m_complexPoi = kInvalidId;
uint32_t m_building = kInvalidId;
uint32_t m_street = kInvalidId;
uint32_t m_suburb = kInvalidId;
};
std::string DebugPrint(IntersectionResult const & result);
} // namespace search

View file

@ -0,0 +1,144 @@
#pragma once
#include <algorithm>
#include <set>
#include <utility>
#include <vector>
// todo(@m) Move to search/base?
namespace search
{
// This class represents a set of disjoint intervals in the form
// [begin, end). Note that neighbour intervals are always coalesced,
// so while [0, 1), [1, 2) and [2, 3) are disjoint, after addition to
// the set they will be stored as a single [0, 3).
template <typename Elem>
class IntervalSet
{
public:
using Interval = std::pair<Elem, Elem>;
// Adds an |interval| to the set, coalescing adjacent intervals if needed.
//
// Complexity: O(num of intervals intersecting with |interval| +
// log(total number of intervals)).
void Add(Interval const & interval);
// Subtracts set from an |interval| and appends result to
// |difference|.
//
// Complexity: O(num of intervals intersecting with |interval| +
// log(total number of intervals)).
void SubtractFrom(Interval const & interval, std::vector<Interval> & difference) const;
// Returns all elements of the set as a set of intervals.
//
// Complexity: O(1).
inline std::set<Interval> const & Elems() const { return m_intervals; }
private:
using Iterator = typename std::set<Interval>::iterator;
// Calculates range of intervals that have non-empty intersection with a given |interval|.
void Cover(Interval const & interval, Iterator & begin, Iterator & end) const;
// This is a set of disjoint intervals.
std::set<Interval> m_intervals;
};
template <typename Elem>
void IntervalSet<Elem>::Add(Interval const & interval)
{
// Skips empty intervals.
if (interval.first == interval.second)
return;
Iterator begin;
Iterator end;
Cover(interval, begin, end);
Elem from = interval.first;
Elem to = interval.second;
// Updates |from| and |to| in accordance with corner intervals (if any).
if (begin != end)
{
if (begin->first < from)
from = begin->first;
auto last = end;
--last;
if (last->second > to)
to = last->second;
}
// Now all elements [from, to) can be added to the set as a single
// interval which will replace all intervals in [begin, end). But
// note that it can be possible to merge new interval with its
// neighbors, so following code checks it.
if (begin != m_intervals.begin())
{
auto prevBegin = begin;
--prevBegin;
if (prevBegin->second == from)
{
begin = prevBegin;
from = prevBegin->first;
}
}
if (end != m_intervals.end() && end->first == to)
{
to = end->second;
++end;
}
m_intervals.erase(begin, end);
m_intervals.emplace(from, to);
}
template <typename Elem>
void IntervalSet<Elem>::SubtractFrom(Interval const & interval, std::vector<Interval> & difference) const
{
Iterator begin;
Iterator end;
Cover(interval, begin, end);
Elem from = interval.first;
Elem const to = interval.second;
for (auto it = begin; it != end && from < to; ++it)
{
if (it->first > from)
{
difference.emplace_back(from, it->first);
from = it->second;
}
else
{
from = std::max(from, it->second);
}
}
if (from < to)
difference.emplace_back(from, to);
}
template <typename Elem>
void IntervalSet<Elem>::Cover(Interval const & interval, Iterator & begin, Iterator & end) const
{
Elem const & from = interval.first;
Elem const & to = interval.second;
begin = m_intervals.lower_bound(std::make_pair(from, from));
if (begin != m_intervals.begin())
{
auto prev = begin;
--prev;
if (prev->second > from)
begin = prev;
}
end = m_intervals.lower_bound(std::make_pair(to, to));
}
} // namespace search

View file

@ -0,0 +1,89 @@
#include "keyword_lang_matcher.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/assert.hpp"
#include "base/stl_helpers.hpp"
#include <algorithm>
#include <limits>
#include <sstream>
using namespace std;
namespace search
{
// KeywordLangMatcher::Score ----------------------------------------------------------------------
KeywordLangMatcher::Score::Score() : m_langScore(numeric_limits<int>::min()) {}
KeywordLangMatcher::Score::Score(KeywordMatcher::Score const & score, int langScore)
: m_parentScore(score)
, m_langScore(langScore)
{}
bool KeywordLangMatcher::Score::operator<(KeywordLangMatcher::Score const & score) const
{
if (m_parentScore != score.m_parentScore)
return m_parentScore < score.m_parentScore;
if (m_langScore != score.m_langScore)
return m_langScore < score.m_langScore;
return m_parentScore.LessInTokensLength(score.m_parentScore);
}
bool KeywordLangMatcher::Score::operator<=(KeywordLangMatcher::Score const & score) const
{
return !(score < *this);
}
// KeywordLangMatcher ------------------------------------------------------------------------------
KeywordLangMatcher::KeywordLangMatcher(size_t maxLanguageTiers) : m_languagePriorities(maxLanguageTiers)
{
// Should we ever have this many tiers, the idea of storing a vector of vectors must be revised.
ASSERT_LESS(maxLanguageTiers, 10, ());
}
void KeywordLangMatcher::SetLanguages(size_t tier, std::vector<int8_t> && languages)
{
ASSERT_LESS(tier, m_languagePriorities.size(), ());
m_languagePriorities[tier] = std::move(languages);
}
int KeywordLangMatcher::CalcLangScore(int8_t lang) const
{
int const numTiers = static_cast<int>(m_languagePriorities.size());
for (int i = 0; i < numTiers; ++i)
{
for (int8_t x : m_languagePriorities[i])
if (x == lang)
return -i;
}
return -numTiers;
}
KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, string_view name) const
{
return Score(m_keywordMatcher.CalcScore(name), CalcLangScore(lang));
}
KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, strings::UniString const & name) const
{
return Score(m_keywordMatcher.CalcScore(name), CalcLangScore(lang));
}
KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, strings::UniString const * tokens,
size_t count) const
{
return Score(m_keywordMatcher.CalcScore(tokens, count), CalcLangScore(lang));
}
// Functions ---------------------------------------------------------------------------------------
string DebugPrint(KeywordLangMatcher::Score const & score)
{
ostringstream ss;
ss << "KLM::Score(" << DebugPrint(score.m_parentScore) << ", LS=" << score.m_langScore << ")";
return ss.str();
}
} // namespace search

View file

@ -0,0 +1,64 @@
#pragma once
#include "search/keyword_matcher.hpp"
#include "base/string_utils.hpp"
#include <string>
#include <vector>
namespace search
{
class KeywordLangMatcher
{
public:
class Score
{
public:
Score();
bool operator<(Score const & s) const;
bool operator<=(Score const & s) const;
private:
friend class KeywordLangMatcher;
friend std::string DebugPrint(Score const & score);
Score(KeywordMatcher::Score const & score, int langScore);
KeywordMatcher::Score m_parentScore;
int m_langScore;
};
// Constructs a matcher that supports up to |maxLanguageTiers| tiers.
// All languages in the same tier are considered equal.
// The lower the tier is, the more important the languages in it are.
explicit KeywordLangMatcher(size_t maxLanguageTiers);
// Defines the languages in the |tier| to be exactly |languages|.
void SetLanguages(size_t const tier, std::vector<int8_t> && languages);
// Calls |fn| on every language in every tier. Does not make a distinction
// between languages in different tiers.
template <typename Fn>
void ForEachLanguage(Fn && fn) const
{
for (auto const & langs : m_languagePriorities)
for (int8_t lang : langs)
fn(lang);
}
// Store references to keywords from source array of strings.
inline void SetKeywords(QueryString const & query) { m_keywordMatcher.SetKeywords(query); }
// Returns the Score of the name (greater is better).
Score CalcScore(int8_t lang, std::string_view name) const;
Score CalcScore(int8_t lang, strings::UniString const & name) const;
Score CalcScore(int8_t lang, strings::UniString const * tokens, size_t count) const;
private:
int CalcLangScore(int8_t lang) const;
std::vector<std::vector<int8_t>> m_languagePriorities;
KeywordMatcher m_keywordMatcher;
};
} // namespace search

View file

@ -0,0 +1,166 @@
#include "search/keyword_matcher.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/assert.hpp"
#include "base/buffer_vector.hpp"
#include "base/stl_helpers.hpp"
#include <algorithm>
#include <sstream>
namespace search
{
using namespace std;
KeywordMatcher::KeywordMatcher()
{
Clear();
}
void KeywordMatcher::Clear()
{
m_keywords.clear();
m_prefix.clear();
}
void KeywordMatcher::SetKeywords(QueryString const & query)
{
m_keywords.assign(query.m_tokens.begin(), query.m_tokens.end());
m_prefix = query.m_prefix;
}
KeywordMatcher::Score KeywordMatcher::CalcScore(string_view name) const
{
return CalcScore(NormalizeAndSimplifyString(name));
}
KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const & name) const
{
buffer_vector<strings::UniString, kMaxNumTokens> tokens;
SplitUniString(name, base::MakeBackInsertFunctor(tokens), Delimiters());
return CalcScore(tokens.data(), tokens.size());
}
KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const * tokens, size_t count) const
{
// Some names can have too many tokens. Trim them.
count = min(count, kMaxNumTokens);
vector<bool> isQueryTokenMatched(m_keywords.size());
vector<bool> isNameTokenMatched(count);
uint32_t sumTokenMatchDistance = 0;
int8_t prevTokenMatchDistance = 0;
bool prefixMatched = true;
for (size_t i = 0; i < m_keywords.size(); ++i)
{
for (size_t j = 0; j < count && !isQueryTokenMatched[i]; ++j)
{
if (!isNameTokenMatched[j] && m_keywords[i] == tokens[j])
{
isQueryTokenMatched[i] = isNameTokenMatched[j] = true;
int8_t const tokenMatchDistance = i - j;
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
prevTokenMatchDistance = tokenMatchDistance;
}
}
}
if (!m_prefix.empty())
{
prefixMatched = false;
for (size_t j = 0; j < count && !prefixMatched; ++j)
{
if (!isNameTokenMatched[j] &&
strings::StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix.begin(), m_prefix.end()))
{
isNameTokenMatched[j] = prefixMatched = true;
int8_t const tokenMatchDistance = int(m_keywords.size()) - j;
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
}
}
}
uint8_t numQueryTokensMatched = 0;
for (size_t i = 0; i < isQueryTokenMatched.size(); ++i)
if (isQueryTokenMatched[i])
++numQueryTokensMatched;
Score score;
score.m_fullQueryMatched = prefixMatched && (numQueryTokensMatched == isQueryTokenMatched.size());
score.m_prefixMatched = prefixMatched;
score.m_numQueryTokensAndPrefixMatched = numQueryTokensMatched + (prefixMatched ? 1 : 0);
score.m_nameTokensMatched = 0;
score.m_nameTokensLength = 0;
for (size_t i = 0; i < count; ++i)
{
if (isNameTokenMatched[i])
score.m_nameTokensMatched |= (1 << (kMaxNumTokens - 1 - i));
score.m_nameTokensLength += tokens[i].size();
}
score.m_sumTokenMatchDistance = sumTokenMatchDistance;
return score;
}
KeywordMatcher::Score::Score()
: m_sumTokenMatchDistance(0)
, m_nameTokensMatched(0)
, m_nameTokensLength(0)
, m_numQueryTokensAndPrefixMatched(0)
, m_fullQueryMatched(false)
, m_prefixMatched(false)
{}
bool KeywordMatcher::Score::operator<(KeywordMatcher::Score const & s) const
{
if (m_fullQueryMatched != s.m_fullQueryMatched)
return m_fullQueryMatched < s.m_fullQueryMatched;
if (m_numQueryTokensAndPrefixMatched != s.m_numQueryTokensAndPrefixMatched)
return m_numQueryTokensAndPrefixMatched < s.m_numQueryTokensAndPrefixMatched;
if (m_prefixMatched != s.m_prefixMatched)
return m_prefixMatched < s.m_prefixMatched;
if (m_nameTokensMatched != s.m_nameTokensMatched)
return m_nameTokensMatched < s.m_nameTokensMatched;
if (m_sumTokenMatchDistance != s.m_sumTokenMatchDistance)
return m_sumTokenMatchDistance > s.m_sumTokenMatchDistance;
return false;
}
bool KeywordMatcher::Score::operator==(KeywordMatcher::Score const & s) const
{
return m_sumTokenMatchDistance == s.m_sumTokenMatchDistance && m_nameTokensMatched == s.m_nameTokensMatched &&
m_numQueryTokensAndPrefixMatched == s.m_numQueryTokensAndPrefixMatched &&
m_fullQueryMatched == s.m_fullQueryMatched && m_prefixMatched == s.m_prefixMatched;
}
bool KeywordMatcher::Score::LessInTokensLength(Score const & s) const
{
if (m_fullQueryMatched)
{
ASSERT(s.m_fullQueryMatched, ());
return m_nameTokensLength > s.m_nameTokensLength;
}
return false;
}
string DebugPrint(KeywordMatcher::Score const & score)
{
ostringstream out;
out << "KeywordMatcher::Score(";
out << "FQM=" << score.m_fullQueryMatched;
out << ",nQTM=" << static_cast<int>(score.m_numQueryTokensAndPrefixMatched);
out << ",PM=" << score.m_prefixMatched;
out << ",NTM=";
for (int i = static_cast<int>(kMaxNumTokens) - 1; i >= 0; --i)
out << ((score.m_nameTokensMatched >> i) & 1);
out << ",STMD=" << score.m_sumTokenMatchDistance;
out << ")";
return out.str();
}
} // namespace search

View file

@ -0,0 +1,60 @@
#pragma once
#include "search/common.hpp"
#include "base/string_utils.hpp"
#include <string>
#include <vector>
namespace search
{
class KeywordMatcher
{
public:
class Score
{
public:
Score();
// *NOTE* m_nameTokensLength is usually used as a late stage tiebreaker
// and does not take part in the operators.
bool operator<(Score const & s) const;
bool operator==(Score const & s) const;
bool operator!=(Score const & s) const { return !(*this == s); }
bool LessInTokensLength(Score const & s) const;
bool IsQueryMatched() const { return m_fullQueryMatched; }
private:
friend class KeywordMatcher;
friend std::string DebugPrint(Score const & score);
uint32_t m_sumTokenMatchDistance;
uint32_t m_nameTokensMatched;
uint32_t m_nameTokensLength;
uint8_t m_numQueryTokensAndPrefixMatched;
bool m_fullQueryMatched : 1;
bool m_prefixMatched : 1;
};
KeywordMatcher();
void Clear();
/// Internal copy of keywords is made.
void SetKeywords(QueryString const & query);
/// @return Score of the name (greater is better).
//@{
Score CalcScore(std::string_view name) const;
Score CalcScore(strings::UniString const & name) const;
Score CalcScore(strings::UniString const * tokens, size_t count) const;
//@}
private:
std::vector<strings::UniString> m_keywords;
strings::UniString m_prefix;
};
} // namespace search

View file

@ -0,0 +1,298 @@
#include "search/latlon_match.hpp"
#include "base/macros.hpp"
#include <algorithm>
#include <array>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <string>
#include <utility>
using namespace std;
namespace
{
string const kSpaces = " \t";
string const kCharsToSkip = " \n\t,;:.()";
string const kDecimalMarks = ".,";
bool IsDecimalMark(char c)
{
return kDecimalMarks.find(c) != string::npos;
}
bool IsNegativeSymbol(char c)
{
return c == '-';
}
template <typename Char>
void SkipSpaces(Char *& s)
{
while (kSpaces.find(*s) != string::npos)
++s;
}
template <typename Char>
void Skip(Char *& s)
{
while (kCharsToSkip.find(*s) != string::npos)
++s;
}
bool MatchDMSArray(char const *& s, char const * arr[], size_t count)
{
for (size_t i = 0; i < count; ++i)
{
size_t const len = strlen(arr[i]);
if (strncmp(s, arr[i], len) == 0)
{
s += len;
return true;
}
}
return false;
}
int GetDMSIndex(char const *& s)
{
char const * arrDegree[] = {"*", "°"};
char const * arrMinutes[] = {"\'", "", ""};
char const * arrSeconds[] = {"\"", "", "", "\'\'", "", ""};
if (MatchDMSArray(s, arrDegree, ARRAY_SIZE(arrDegree)))
return 0;
if (MatchDMSArray(s, arrSeconds, ARRAY_SIZE(arrSeconds)))
return 2;
if (MatchDMSArray(s, arrMinutes, ARRAY_SIZE(arrMinutes)))
return 1;
return -1;
}
bool SkipNSEW(char const *& s, char const * (&arrPos)[4])
{
Skip(s);
int ind;
switch (*s)
{
case 'N':
case 'n': ind = 0; break;
case 'S':
case 's': ind = 1; break;
case 'E':
case 'e': ind = 2; break;
case 'W':
case 'w': ind = 3; break;
default: return false;
}
arrPos[ind] = s++;
return true;
}
// Attempts to read a double from the start of |str|
// in one of what we assume are two most common forms
// for lat/lon: decimal digits separated either
// by a dot or by a comma, with digits on both sides
// of the separator.
// If the attempt fails, falls back to std::strtod.
double EatDouble(char const * str, char ** strEnd)
{
bool gotDigitBeforeMark = false;
bool gotMark = false;
bool gotDigitAfterMark = false;
char const * markPos = nullptr;
char const * p = str;
double modifier = 1.0;
while (true)
{
if (IsDecimalMark(*p))
{
if (gotMark)
break;
gotMark = true;
markPos = p;
}
else if (isdigit(*p))
{
if (gotMark)
gotDigitAfterMark = true;
else
gotDigitBeforeMark = true;
}
else if (IsNegativeSymbol(*p))
{
modifier = -1.0;
}
else
{
break;
}
++p;
}
if (gotDigitBeforeMark && gotMark && gotDigitAfterMark)
{
string const part1(str, markPos);
string const part2(markPos + 1, p);
*strEnd = const_cast<char *>(p);
auto const x1 = atof(part1.c_str());
auto const x2 = atof(part2.c_str());
return x1 + x2 * modifier * pow(10.0, -static_cast<double>(part2.size()));
}
return strtod(str, strEnd);
}
} // namespace
namespace search
{
bool MatchLatLonDegree(string const & query, double & lat, double & lon)
{
// should be default initialization (0, false)
array<pair<double, bool>, 6> v;
int base = 0;
// Positions of N, S, E, W symbols
char const * arrPos[] = {nullptr, nullptr, nullptr, nullptr};
bool arrDegreeSymbol[] = {false, false};
char const * const startQuery = query.c_str();
char const * s = startQuery;
while (true)
{
char const * s1 = s;
char const * s11 = s;
if (SkipNSEW(s, arrPos))
{
s11 = s;
Skip(s);
}
else
SkipSpaces(s);
if (!*s)
{
// End of the string - check matching.
break;
}
char * s2;
double const x = EatDouble(s, &s2);
if (s == s2)
{
// invalid token
if (s == s11)
{
// Return error if there are no any delimiters.
return false;
}
else
{
// Check matching if token is delimited.
break;
}
}
else if (x < 0 && s == s1 && !(s == startQuery || kSpaces.find(*(s - 1)) != string::npos))
{
// Skip input like "3-8"
return false;
}
s = s2;
SkipSpaces(s);
int i = GetDMSIndex(s);
bool degreeSymbol = true;
if (i == -1)
{
// try to assign next possible value mark
if (arrDegreeSymbol[base / 3])
{
if (!v[base + 1].second)
i = 1;
else
i = 2;
}
else
{
i = 0;
degreeSymbol = false;
}
}
if (i == 0) // degrees
{
if (v[base].second)
{
if (base == 0)
{
base += 3;
}
else
{
// too many degree values
return false;
}
}
arrDegreeSymbol[base / 3] = degreeSymbol;
}
else // minutes or seconds
if (x < 0.0 || x > 60.0 || // minutes or seconds should be in [0, 60] range
v[base + i].second || // value already exists
!v[base].second || // no degrees found for value
(i == 2 && !v[base + 1].second)) // no minutes for seconds
{
return false;
}
v[base + i].first = x;
v[base + i].second = true;
}
if (!v[0].second || !v[3].second)
{
// degree should exist for both coordinates
return false;
}
if ((arrPos[0] && arrPos[1]) || (arrPos[2] && arrPos[3]))
{
// control symbols should match only once
return false;
}
// Calculate Lat, Lon with correct sign.
lat = fabs(v[0].first) + v[1].first / 60.0 + v[2].first / 3600.0;
if (v[0].first < 0.0)
lat = -lat;
lon = fabs(v[3].first) + v[4].first / 60.0 + v[5].first / 3600.0;
if (v[3].first < 0.0)
lon = -lon;
if (max(arrPos[0], arrPos[1]) > max(arrPos[2], arrPos[3]))
swap(lat, lon);
if (arrPos[1] != nullptr)
lat = -lat;
if (arrPos[3] != nullptr)
lon = -lon;
// Valid input ranges for longitude are: [0, 360] or [-180, 180].
// We normalize it to [-180, 180].
if (lon < -180.0 || lon > 360.0)
return false;
if (lon > 180.0)
lon -= 360.0;
return fabs(lat) <= 90.0;
}
} // namespace search

View file

@ -0,0 +1,9 @@
#pragma once
#include <string>
namespace search
{
// Parses input query for most input coordinates cases.
bool MatchLatLonDegree(std::string const & query, double & lat, double & lon);
} // namespace search

View file

@ -0,0 +1,56 @@
#include "search/lazy_centers_table.hpp"
#include "indexer/mwm_set.hpp"
#include "platform/mwm_traits.hpp"
#include "defines.hpp"
namespace search
{
LazyCentersTable::LazyCentersTable(MwmValue const & value)
: m_value(value)
, m_state(STATE_NOT_LOADED)
, m_reader(std::unique_ptr<ModelReader>())
{}
void LazyCentersTable::EnsureTableLoaded()
{
if (m_state != STATE_NOT_LOADED)
return;
try
{
m_reader = m_value.m_cont.GetReader(CENTERS_FILE_TAG);
}
catch (RootException const & ex)
{
LOG(LERROR, ("Unable to load", CENTERS_FILE_TAG, ex.Msg()));
m_state = STATE_FAILED;
return;
}
version::MwmTraits traits(m_value.GetMwmVersion());
auto const format = traits.GetCentersTableFormat();
if (format == version::MwmTraits::CentersTableFormat::PlainEliasFanoMap)
m_table = CentersTable::LoadV0(*m_reader.GetPtr(), m_value.GetHeader().GetDefGeometryCodingParams());
else if (format == version::MwmTraits::CentersTableFormat::EliasFanoMapWithHeader)
m_table = CentersTable::LoadV1(*m_reader.GetPtr());
else
CHECK(false, ("Unknown centers table format."));
if (m_table)
m_state = STATE_LOADED;
else
m_state = STATE_FAILED;
}
bool LazyCentersTable::Get(uint32_t id, m2::PointD & center)
{
EnsureTableLoaded();
if (m_state != STATE_LOADED)
return false;
return m_table->Get(id, center);
}
} // namespace search

View file

@ -0,0 +1,41 @@
#pragma once
#include "indexer/centers_table.hpp"
#include "coding/files_container.hpp"
#include "geometry/point2d.hpp"
#include <cstdint>
#include <memory>
class MwmValue;
namespace search
{
class LazyCentersTable
{
public:
enum State
{
STATE_NOT_LOADED,
STATE_LOADED,
STATE_FAILED
};
explicit LazyCentersTable(MwmValue const & value);
inline State GetState() const { return m_state; }
void EnsureTableLoaded();
[[nodiscard]] bool Get(uint32_t id, m2::PointD & center);
private:
MwmValue const & m_value;
State m_state;
FilesContainerR::TReader m_reader;
std::unique_ptr<CentersTable> m_table;
};
} // namespace search

View file

@ -0,0 +1,17 @@
#include "search/localities_source.hpp"
#include "indexer/classificator.hpp"
namespace search
{
LocalitiesSource::LocalitiesSource()
{
auto & c = classif();
auto const city = c.GetTypeByPath({"place", "city"});
c.ForEachInSubtree([this](uint32_t c) { m_cities.push_back(c); }, city);
auto const town = c.GetTypeByPath({"place", "town"});
c.ForEachInSubtree([this](uint32_t t) { m_towns.push_back(t); }, town);
}
} // namespace search

View file

@ -0,0 +1,24 @@
#pragma once
#include <cstdint>
#include <vector>
namespace search
{
struct LocalitiesSource
{
LocalitiesSource();
template <typename Fn>
void ForEachType(Fn && fn) const
{
for (auto const c : m_cities)
fn(c);
for (auto const t : m_towns)
fn(t);
}
std::vector<uint32_t> m_cities;
std::vector<uint32_t> m_towns;
};
} // namespace search

View file

@ -0,0 +1,313 @@
#include "search/locality_finder.hpp"
#include "search/categories_cache.hpp"
#include "search/cbv.hpp"
#include "search/dummy_rank_table.hpp"
#include "search/mwm_context.hpp"
#include "indexer/data_source.hpp"
#include "indexer/feature_visibility.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "base/assert.hpp"
#include "base/stl_helpers.hpp"
#include <vector>
namespace search
{
using namespace std;
namespace
{
double constexpr kMaxCityRadiusMeters = 30000.0;
double constexpr kMaxVillageRadiusMeters = 2000.0;
struct Filter
{
public:
virtual ~Filter() = default;
virtual bool IsGood(uint32_t id) const = 0;
};
class CityFilter : public Filter
{
public:
explicit CityFilter(RankTable const & ranks) : m_ranks(ranks) {}
// Filter overrides:
bool IsGood(uint32_t id) const override { return m_ranks.Get(id) != 0; }
private:
RankTable const & m_ranks;
};
class VillageFilter : public Filter
{
public:
VillageFilter(MwmContext const & ctx, VillagesCache & villages) : m_cbv(villages.Get(ctx)) {}
// Filter overrides:
bool IsGood(uint32_t id) const override { return m_cbv.HasBit(id); }
private:
CBV m_cbv;
};
class LocalitiesLoader
{
public:
LocalitiesLoader(MwmContext const & ctx, CitiesBoundariesTable const & boundaries, Filter const & filter,
LocalityFinder::Holder & holder, map<MwmSet::MwmId, unordered_set<uint32_t>> & loadedIds)
: m_ctx(ctx)
, m_boundaries(boundaries)
, m_filter(filter)
, m_holder(holder)
, m_loadedIds(loadedIds[m_ctx.GetId()])
{}
void operator()(uint32_t id) const
{
if (!m_filter.IsGood(id))
return;
if (m_loadedIds.count(id) != 0)
return;
auto ft = m_ctx.GetFeature(id);
if (!ft)
return;
if (ft->GetGeomType() != feature::GeomType::Point)
return;
using namespace ftypes;
switch (IsLocalityChecker::Instance().GetType(*ft))
{
case LocalityType::City:
case LocalityType::Town:
case LocalityType::Village: break;
default: return;
}
auto const population = ftypes::GetPopulation(*ft);
if (population == 0)
return;
auto const & names = ft->GetNames();
auto const center = ft->GetCenter();
CitiesBoundariesTable::Boundaries boundaries;
auto const fid = ft->GetID();
m_boundaries.Get(fid, boundaries);
m_holder.Add(LocalityItem(names, center, std::move(boundaries), population, fid));
m_loadedIds.insert(id);
}
private:
MwmContext const & m_ctx;
CitiesBoundariesTable const & m_boundaries;
Filter const & m_filter;
LocalityFinder::Holder & m_holder;
unordered_set<uint32_t> & m_loadedIds;
};
int GetVillagesScale()
{
auto currentVillagesMinDrawableScale = 0;
ftypes::IsVillageChecker::Instance().ForEachType([&currentVillagesMinDrawableScale](uint32_t type)
{
feature::TypesHolder th;
th.Assign(type);
currentVillagesMinDrawableScale = max(currentVillagesMinDrawableScale, GetMinDrawableScaleClassifOnly(th));
});
// Needed for backward compatibility. |kCompatibilityVillagesMinDrawableScale| should be set to
// maximal value we have in mwms over all data versions.
int constexpr kCompatibilityVillagesMinDrawableScale = 13;
ASSERT_LESS_OR_EQUAL(currentVillagesMinDrawableScale, kCompatibilityVillagesMinDrawableScale,
("Set kCompatibilityVillagesMinDrawableScale to", currentVillagesMinDrawableScale));
return max(currentVillagesMinDrawableScale, kCompatibilityVillagesMinDrawableScale);
}
} // namespace
// LocalityItem ------------------------------------------------------------------------------------
LocalityItem::LocalityItem(StringUtf8Multilang const & names, m2::PointD const & center, Boundaries && boundaries,
uint64_t population, FeatureID const & id)
: m_names(names)
, m_center(center)
, m_boundaries(std::move(boundaries))
, m_population(population)
, m_id(id)
{}
string DebugPrint(LocalityItem const & item)
{
stringstream os;
os << "Names = " << DebugPrint(item.m_names) << ", ";
os << "Center = " << DebugPrint(item.m_center) << ", ";
os << "Population = " << item.m_population << ", ";
os << "Boundaries = " << DebugPrint(item.m_boundaries);
return os.str();
}
// LocalitySelector --------------------------------------------------------------------------------
LocalitySelector::LocalitySelector(m2::PointD const & p) : m_p(p) {}
void LocalitySelector::operator()(LocalityItem const & item)
{
auto const inside = item.m_boundaries.HasPoint(m_p);
// TODO (@y, @m): replace this naive score by p-values on
// multivariate Gaussian.
double const distance = mercator::DistanceOnEarth(item.m_center, m_p);
// GetPopulationByRadius may return 0.
double const score = (ftypes::GetPopulationByRadius(distance) + 1) / static_cast<double>(item.m_population);
if (!inside && m_inside)
return;
ASSERT(inside || !m_inside, ());
if ((inside && !m_inside) || (score < m_score))
{
m_inside = inside;
m_score = score;
m_locality = &item;
}
}
// LocalityFinder::Holder --------------------------------------------------------------------------
LocalityFinder::Holder::Holder(double radiusMeters) : m_radiusMeters(radiusMeters) {}
bool LocalityFinder::Holder::IsCovered(m2::RectD const & rect) const
{
bool covered = false;
m_coverage.ForEachInRect(rect, [&covered](bool) { covered = true; });
return covered;
}
void LocalityFinder::Holder::SetCovered(m2::PointD const & p)
{
m_coverage.Add(true, m2::RectD(p, p));
}
void LocalityFinder::Holder::Add(LocalityItem const & item)
{
m_localities.Add(item, m2::RectD(item.m_center, item.m_center));
}
void LocalityFinder::Holder::ForEachInVicinity(m2::RectD const & rect, LocalitySelector & selector) const
{
m_localities.ForEachInRect(rect, selector);
}
m2::RectD LocalityFinder::Holder::GetRect(m2::PointD const & p) const
{
return mercator::RectByCenterXYAndSizeInMeters(p, m_radiusMeters);
}
m2::RectD LocalityFinder::Holder::GetDRect(m2::PointD const & p) const
{
return mercator::RectByCenterXYAndSizeInMeters(p, 2 * m_radiusMeters);
}
void LocalityFinder::Holder::Clear()
{
m_coverage.Clear();
m_localities.Clear();
}
// LocalityFinder ----------------------------------------------------------------------------------
LocalityFinder::LocalityFinder(DataSource const & dataSource, CitiesBoundariesTable const & boundariesTable,
VillagesCache & villagesCache)
: m_dataSource(dataSource)
, m_boundariesTable(boundariesTable)
, m_villagesCache(villagesCache)
, m_cities(kMaxCityRadiusMeters)
, m_villages(kMaxVillageRadiusMeters)
, m_mapsLoaded(false)
{}
void LocalityFinder::ClearCache()
{
m_ranks.reset();
m_cities.Clear();
m_villages.Clear();
m_maps.Clear();
m_worldId.Reset();
m_mapsLoaded = false;
m_loadedIds.clear();
}
void LocalityFinder::LoadVicinity(m2::PointD const & p, bool loadCities, bool loadVillages)
{
UpdateMaps();
if (loadCities)
{
m2::RectD const crect = m_cities.GetDRect(p);
auto handle = m_dataSource.GetMwmHandleById(m_worldId);
if (handle.IsAlive())
{
auto const & value = *handle.GetValue();
if (!m_ranks)
m_ranks = RankTable::Load(value.m_cont, SEARCH_RANKS_FILE_TAG);
if (!m_ranks)
m_ranks = make_unique<DummyRankTable>();
MwmContext ctx(std::move(handle));
ctx.ForEachIndex(crect, LocalitiesLoader(ctx, m_boundariesTable, CityFilter(*m_ranks), m_cities, m_loadedIds));
}
m_cities.SetCovered(p);
}
if (loadVillages)
{
m2::RectD const vrect = m_villages.GetDRect(p);
m_maps.ForEachInRect(m2::RectD(p, p), [&](MwmSet::MwmId const & id)
{
auto handle = m_dataSource.GetMwmHandleById(id);
if (!handle.IsAlive())
return;
static int const scale = GetVillagesScale();
MwmContext ctx(std::move(handle));
ctx.ForEachIndex(
vrect, scale,
LocalitiesLoader(ctx, m_boundariesTable, VillageFilter(ctx, m_villagesCache), m_villages, m_loadedIds));
});
m_villages.SetCovered(p);
}
}
void LocalityFinder::UpdateMaps()
{
if (m_mapsLoaded)
return;
vector<shared_ptr<MwmInfo>> mwmsInfo;
m_dataSource.GetMwmsInfo(mwmsInfo);
for (auto const & info : mwmsInfo)
{
MwmSet::MwmId id(info);
switch (info->GetType())
{
case MwmInfo::WORLD: m_worldId = id; break;
/// @todo Use fair MWM rect from CountryInfoGetter here and everywhere in search?
/// @see MwmInfo.m_bordersRect for details.
case MwmInfo::COUNTRY: m_maps.Add(id, info->m_bordersRect); break;
case MwmInfo::COASTS: break;
}
}
m_mapsLoaded = true;
}
} // namespace search

View file

@ -0,0 +1,160 @@
#pragma once
#include "search/cities_boundaries_table.hpp"
#include "indexer/feature_utils.hpp"
#include "indexer/mwm_set.hpp"
#include "indexer/rank_table.hpp"
#include "platform/preferred_languages.hpp"
#include "coding/string_utf8_multilang.hpp"
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
#include "geometry/tree4d.hpp"
#include "base/macros.hpp"
#include <cstdint>
#include <limits>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
class DataSource;
namespace search
{
class VillagesCache;
struct LocalityItem
{
using Boundaries = CitiesBoundariesTable::Boundaries;
LocalityItem(StringUtf8Multilang const & names, m2::PointD const & center, Boundaries && boundaries,
uint64_t population, FeatureID const & id);
bool GetName(int8_t lang, std::string_view & name) const { return m_names.GetString(lang, name); }
bool GetSpecifiedOrDefaultName(int8_t lang, std::string_view & name) const
{
return GetName(lang, name) || GetName(StringUtf8Multilang::kDefaultCode, name);
}
bool GetReadableName(std::string_view & name) const
{
auto const mwmInfo = m_id.m_mwmId.GetInfo();
if (!mwmInfo)
return false;
feature::NameParamsOut out;
feature::GetReadableName(
{m_names, mwmInfo->GetRegionData(), languages::GetCurrentMapLanguage(), false /* allowTranslit */}, out);
name = out.primary;
return !name.empty();
}
StringUtf8Multilang m_names;
m2::PointD m_center;
Boundaries m_boundaries;
uint64_t m_population;
FeatureID m_id;
};
std::string DebugPrint(LocalityItem const & item);
class LocalitySelector
{
public:
LocalitySelector(m2::PointD const & p);
void operator()(LocalityItem const & item);
template <typename Fn>
bool WithBestLocality(Fn && fn) const
{
if (!m_locality)
return false;
fn(*m_locality);
return true;
}
private:
m2::PointD const m_p;
bool m_inside = false;
double m_score = std::numeric_limits<double>::max();
LocalityItem const * m_locality = nullptr;
};
class LocalityFinder
{
public:
class Holder
{
public:
Holder(double radiusMeters);
bool IsCovered(m2::RectD const & rect) const;
void SetCovered(m2::PointD const & p);
void Add(LocalityItem const & item);
void ForEachInVicinity(m2::RectD const & rect, LocalitySelector & selector) const;
m2::RectD GetRect(m2::PointD const & p) const;
m2::RectD GetDRect(m2::PointD const & p) const;
void Clear();
private:
double const m_radiusMeters;
m4::Tree<bool> m_coverage;
m4::Tree<LocalityItem> m_localities;
DISALLOW_COPY_AND_MOVE(Holder);
};
LocalityFinder(DataSource const & dataSource, CitiesBoundariesTable const & boundaries,
VillagesCache & villagesCache);
template <typename Fn>
bool GetLocality(m2::PointD const & p, Fn && fn)
{
m2::RectD const crect = m_cities.GetRect(p);
m2::RectD const vrect = m_villages.GetRect(p);
LoadVicinity(p, !m_cities.IsCovered(crect) /* loadCities */, !m_villages.IsCovered(vrect) /* loadVillages */);
LocalitySelector selector(p);
m_cities.ForEachInVicinity(crect, selector);
m_villages.ForEachInVicinity(vrect, selector);
return selector.WithBestLocality(std::forward<Fn>(fn));
}
void ClearCache();
private:
void LoadVicinity(m2::PointD const & p, bool loadCities, bool loadVillages);
void UpdateMaps();
DataSource const & m_dataSource;
CitiesBoundariesTable const & m_boundariesTable;
VillagesCache & m_villagesCache;
Holder m_cities;
Holder m_villages;
m4::Tree<MwmSet::MwmId> m_maps;
MwmSet::MwmId m_worldId;
bool m_mapsLoaded;
std::unique_ptr<RankTable> m_ranks;
std::map<MwmSet::MwmId, std::unordered_set<uint32_t>> m_loadedIds;
};
} // namespace search

Some files were not shown because too many files have changed in this diff Show more