Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 13:58:55 +01:00
parent 4af19165ec
commit 68073add76
12458 changed files with 12350765 additions and 2 deletions

View file

@ -0,0 +1,68 @@
"""
This file contains api for osmfilter and generator_tool to generate coastline.
"""
import os
import subprocess
from maps_generator.generator import settings
from maps_generator.generator.env import Env
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.osmtools import osmfilter
def filter_coastline(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmfilter(
name_executable,
in_file,
out_file,
output=output,
error=error,
keep="",
keep_ways="natural=coastline",
keep_nodes="capital=yes place=town =city",
)
def make_coastline(env: Env):
coastline_o5m = os.path.join(env.paths.coastline_path, "coastline.o5m")
filter_coastline(
env[settings.OSM_TOOL_FILTER],
env.paths.planet_o5m,
coastline_o5m,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
)
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
preprocess=True,
)
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
make_coasts=True,
fail_on_coasts=True,
threads_count=settings.THREADS_COUNT,
)

View file

@ -0,0 +1,100 @@
from pathlib import Path
import subprocess
import warnings
class Status:
NO_NEW_VERSION = "Failed: new version doesn't exist: {new}"
INTERNAL_ERROR = "Failed: internal error (C++ module) while calculating"
NO_OLD_VERSION = "Skipped: old version doesn't exist: {old}"
NOTHING_TO_DO = "Skipped: output already exists: {out}"
OK = "Succeeded: calculated {out}: {diff_size} out of {new_size} bytes"
TOO_LARGE = "Cancelled: {out}: diff {diff_size} > new version {new_size}"
@classmethod
def is_error(cls, status):
return status == cls.NO_NEW_VERSION or status == cls.INTERNAL_ERROR
def calculate_diff(params):
diff_tool, new, old, out = params["tool"], params["new"], params["old"], params["out"]
if not new.exists():
return Status.NO_NEW_VERSION, params
if not old.exists():
return Status.NO_OLD_VERSION, params
status = Status.OK
if out.exists():
status = Status.NOTHING_TO_DO
else:
res = subprocess.run([diff_tool.as_posix(), "make", old, new, out])
if res.returncode != 0:
return Status.INTERNAL_ERROR, params
diff_size = out.stat().st_size
new_size = new.stat().st_size
if diff_size > new_size:
status = Status.TOO_LARGE
params.update({
"diff_size": diff_size,
"new_size": new_size
})
return status, params
def mwm_diff_calculation(data_dir, logger, depth):
data = list(data_dir.get_mwms())[:depth]
results = map(calculate_diff, data)
for status, params in results:
if Status.is_error(status):
raise Exception(status.format(**params))
logger.info(status.format(**params))
class DataDir(object):
def __init__(self, diff_tool, mwm_name, new_version_dir, old_version_root_dir):
self.diff_tool_path = Path(diff_tool)
self.mwm_name = mwm_name
self.diff_name = self.mwm_name + ".mwmdiff"
self.new_version_dir = Path(new_version_dir)
self.new_version_path = Path(new_version_dir, mwm_name)
self.old_version_root_dir = Path(old_version_root_dir)
def get_mwms(self):
old_versions = sorted(
self.old_version_root_dir.glob("[0-9]*"),
reverse=True
)
for old_version_dir in old_versions:
if (old_version_dir != self.new_version_dir and
old_version_dir.is_dir()):
diff_dir = Path(self.new_version_dir, old_version_dir.name)
diff_dir.mkdir(exist_ok=True)
yield {
"tool": self.diff_tool_path,
"new": self.new_version_path,
"old": Path(old_version_dir, self.mwm_name),
"out": Path(diff_dir, self.diff_name)
}
if __name__ == "__main__":
import logging
import sys
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
logger.setLevel(logging.DEBUG)
data_dir = DataDir(
mwm_name=sys.argv[1], new_version_dir=sys.argv[2],
old_version_root_dir=sys.argv[3],
)
mwm_diff_calculation(data_dir, logger, depth=1)

View file

@ -0,0 +1,582 @@
import collections
import datetime
import logging
import logging.config
import os
import shutil
import sys
from functools import wraps
from typing import Any
from typing import AnyStr
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Set
from typing import Type
from typing import Union
from maps_generator.generator import settings
from maps_generator.generator import status
from maps_generator.generator.osmtools import build_osmtools
from maps_generator.generator.stages import Stage
from maps_generator.utils.file import find_executable
from maps_generator.utils.file import is_executable
from maps_generator.utils.file import make_symlink
logger = logging.getLogger("maps_generator")
WORLD_NAME = "World"
WORLD_COASTS_NAME = "WorldCoasts"
WORLDS_NAMES = {WORLD_NAME, WORLD_COASTS_NAME}
def get_all_countries_list(borders_path: AnyStr) -> List[AnyStr]:
"""Returns all countries including World and WorldCoasts."""
return [
f.replace(".poly", "")
for f in os.listdir(borders_path)
if os.path.isfile(os.path.join(borders_path, f))
] + list(WORLDS_NAMES)
def create_if_not_exist_path(path: AnyStr) -> bool:
"""Creates directory if it doesn't exist."""
try:
os.makedirs(path)
logger.info(f"Create {path} ...")
return True
except FileExistsError:
return False
def create_if_not_exist(func: Callable[..., AnyStr]) -> Callable[..., AnyStr]:
"""
It's a decorator, that wraps func in create_if_not_exist_path,
that returns a path.
"""
@wraps(func)
def wrapper(*args, **kwargs):
path = func(*args, **kwargs)
create_if_not_exist_path(path)
return path
return wrapper
class Version:
"""It's used for writing and reading a generation version."""
@staticmethod
def write(out_path: AnyStr, version: AnyStr):
with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f:
f.write(str(version))
@staticmethod
def read(version_path: AnyStr) -> int:
with open(version_path) as f:
line = f.readline().strip()
try:
return int(line)
except ValueError:
logger.exception(f"Cast '{line}' to int error.")
return 0
def find_last_build_dir(hint: Optional[AnyStr] = None) -> Optional[AnyStr]:
"""
It tries to find a last generation directory. If it's found function
returns path of last generation directory. Otherwise returns None.
"""
if hint is not None:
p = os.path.join(settings.MAIN_OUT_PATH, hint)
return hint if os.path.exists(p) else None
try:
paths = [
os.path.join(settings.MAIN_OUT_PATH, f)
for f in os.listdir(settings.MAIN_OUT_PATH)
]
except FileNotFoundError:
logger.exception(f"{settings.MAIN_OUT_PATH} not found.")
return None
versions = []
for path in paths:
version_path = os.path.join(path, settings.VERSION_FILE_NAME)
if not os.path.isfile(version_path):
versions.append(0)
else:
versions.append(Version.read(version_path))
pairs = sorted(zip(paths, versions), key=lambda p: p[1], reverse=True)
return None if not pairs or pairs[0][1] == 0 else pairs[0][0].split(os.sep)[-1]
class PathProvider:
"""
PathProvider is used for building paths for a maps generation.
"""
def __init__(self, build_path: AnyStr, build_name:AnyStr, mwm_version: AnyStr):
self.build_path = build_path
self.build_name = build_name
self.mwm_version = mwm_version
create_if_not_exist_path(self.build_path)
@property
@create_if_not_exist
def intermediate_data_path(self) -> AnyStr:
"""
intermediate_data_path contains intermediate files,
for example downloaded external files, that are needed for generation,
*.mwm.tmp files, etc.
"""
return os.path.join(self.build_path, "intermediate_data")
@property
@create_if_not_exist
def cache_path(self) -> AnyStr:
"""cache_path contains caches for nodes, ways, relations."""
if not settings.CACHE_PATH:
return self.intermediate_data_path
return os.path.join(settings.CACHE_PATH, self.build_name)
@property
@create_if_not_exist
def data_path(self) -> AnyStr:
"""It's a synonym for intermediate_data_path."""
return self.intermediate_data_path
@property
@create_if_not_exist
def intermediate_tmp_path(self) -> AnyStr:
"""intermediate_tmp_path contains *.mwm.tmp files."""
return os.path.join(self.intermediate_data_path, "tmp")
@property
@create_if_not_exist
def mwm_path(self) -> AnyStr:
"""mwm_path contains *.mwm files."""
return os.path.join(self.build_path, self.mwm_version)
@property
@create_if_not_exist
def log_path(self) -> AnyStr:
"""mwm_path log files."""
return os.path.join(self.build_path, "logs")
@property
@create_if_not_exist
def generation_borders_path(self) -> AnyStr:
"""
generation_borders_path contains *.poly files, that define
which .mwm files are generated.
"""
return os.path.join(self.intermediate_data_path, "borders")
@property
@create_if_not_exist
def draft_path(self) -> AnyStr:
"""draft_path is used for saving temporary intermediate files."""
return os.path.join(self.build_path, "draft")
@property
@create_if_not_exist
def osm2ft_path(self) -> AnyStr:
"""osm2ft_path contains osmId<->ftId mappings."""
return os.path.join(self.build_path, "osm2ft")
@property
@create_if_not_exist
def coastline_path(self) -> AnyStr:
"""coastline_path is used for a coastline generation."""
return os.path.join(self.intermediate_data_path, "coasts")
@property
@create_if_not_exist
def coastline_tmp_path(self) -> AnyStr:
"""coastline_tmp_path is used for a coastline generation."""
return os.path.join(self.coastline_path, "tmp")
@property
@create_if_not_exist
def status_path(self) -> AnyStr:
"""status_path contains status files."""
return os.path.join(self.build_path, "status")
@property
@create_if_not_exist
def descriptions_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "descriptions")
@property
@create_if_not_exist
def stats_path(self) -> AnyStr:
return os.path.join(self.build_path, "stats")
@property
@create_if_not_exist
def transit_path(self) -> AnyStr:
return self.intermediate_data_path
@property
def transit_path_experimental(self) -> AnyStr:
return (
os.path.join(self.intermediate_data_path, "transit_from_gtfs")
if settings.TRANSIT_URL
else ""
)
@property
def world_roads_path(self) -> AnyStr:
return (
os.path.join(self.intermediate_data_path, "world_roads.txt")
if settings.NEED_BUILD_WORLD_ROADS
else ""
)
@property
def planet_osm_pbf(self) -> AnyStr:
return os.path.join(self.build_path, f"{settings.PLANET}.osm.pbf")
@property
def planet_o5m(self) -> AnyStr:
return os.path.join(self.build_path, f"{settings.PLANET}.o5m")
@property
def world_roads_o5m(self) -> AnyStr:
return os.path.join(self.build_path, "world_roads.o5m")
@property
def main_status_path(self) -> AnyStr:
return os.path.join(self.status_path, status.with_stat_ext("stages"))
@property
def packed_polygons_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "packed_polygons.bin")
@property
def localads_path(self) -> AnyStr:
return os.path.join(self.build_path, f"localads_{self.mwm_version}")
@property
def types_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "types.txt")
@property
def external_resources_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "external_resources.txt")
@property
def id_to_wikidata_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "id_to_wikidata.csv")
@property
def wiki_url_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "wiki_urls.txt")
@property
def ugc_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "ugc_db.sqlite3")
@property
def hotels_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "hotels.csv")
@property
def promo_catalog_cities_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "promo_catalog_cities.json")
@property
def promo_catalog_countries_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "promo_catalog_countries.json")
@property
def popularity_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "popular_places.csv")
@property
def subway_path(self) -> AnyStr:
return os.path.join(
self.intermediate_data_path, "mapsme_osm_subways.transit.json"
)
@property
def food_paths(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "ids_food.json")
@property
def food_translations_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "translations_food.json")
@property
def cities_boundaries_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "cities_boundaries.bin")
@property
def hierarchy_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "hierarchy.txt")
@property
def old_to_new_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "old_vs_new.csv")
@property
def borders_to_osm_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "borders_vs_osm.csv")
@property
def countries_synonyms_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "countries_synonyms.csv")
@property
def counties_txt_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "countries.txt")
@property
def user_resource_path(self) -> AnyStr:
return settings.USER_RESOURCE_PATH
@staticmethod
def srtm_path() -> AnyStr:
return settings.SRTM_PATH
@staticmethod
def isolines_path() -> AnyStr:
return settings.ISOLINES_PATH
@staticmethod
def addresses_path() -> AnyStr:
return settings.ADDRESSES_PATH
@staticmethod
def borders_path() -> AnyStr:
return os.path.join(settings.USER_RESOURCE_PATH, "borders")
@staticmethod
@create_if_not_exist
def tmp_dir():
return settings.TMPDIR
COUNTRIES_NAMES = set(get_all_countries_list(PathProvider.borders_path()))
class Env:
"""
Env provides a generation environment. It sets up instruments and paths,
that are used for a maps generation. It stores state of the maps generation.
"""
def __init__(
self,
countries: Optional[List[AnyStr]] = None,
production: bool = False,
build_name: Optional[AnyStr] = None,
build_suffix: AnyStr = "",
skipped_stages: Optional[Set[Type[Stage]]] = None,
force_download_files: bool = False,
):
self.setup_logging()
logger.info("Start setup ...")
os.environ["TMPDIR"] = PathProvider.tmp_dir()
for k, v in self.setup_osm_tools().items():
setattr(self, k, v)
self.production = production
self.force_download_files = force_download_files
self.countries = countries
self.skipped_stages = set() if skipped_stages is None else skipped_stages
if self.countries is None:
self.countries = get_all_countries_list(PathProvider.borders_path())
self.node_storage = settings.NODE_STORAGE
version_format = "%Y_%m_%d__%H_%M_%S"
suffix_div = "-"
self.dt = None
if build_name is None:
self.dt = datetime.datetime.now()
build_name = self.dt.strftime(version_format)
if build_suffix:
build_name = f"{build_name}{suffix_div}{build_suffix}"
else:
s = build_name.split(suffix_div, maxsplit=1)
if len(s) == 1:
s.append("")
date_str, build_suffix = s
self.dt = datetime.datetime.strptime(date_str, version_format)
self.build_suffix = build_suffix
self.mwm_version = self.dt.strftime("%y%m%d")
self.planet_version = self.dt.strftime("%s")
self.build_path = os.path.join(settings.MAIN_OUT_PATH, build_name)
self.build_name = build_name
self.gen_tool = self.setup_generator_tool()
if WORLD_NAME in self.countries:
self.world_roads_builder_tool = self.setup_world_roads_builder_tool()
self.diff_tool = self.setup_mwm_diff_tool()
logger.info(f"Build name is {self.build_name}.")
logger.info(f"Build path is {self.build_path}.")
self.paths = PathProvider(self.build_path, self.build_name, self.mwm_version)
Version.write(self.build_path, self.planet_version)
self.setup_borders()
self.setup_osm2ft()
if self.force_download_files:
for item in os.listdir(self.paths.status_path):
if item.endswith(".download"):
os.remove(os.path.join(self.paths.status_path, item))
self.main_status = status.Status()
# self.countries_meta stores log files and statuses for each country.
self.countries_meta = collections.defaultdict(dict)
self.subprocess_out = None
self.subprocess_countries_out = {}
printed_countries = ", ".join(self.countries)
if len(self.countries) > 50:
printed_countries = (
f"{', '.join(self.countries[:25])}, ..., "
f"{', '.join(self.countries[-25:])}"
)
logger.info(
f"The following {len(self.countries)} maps will build: "
f"{printed_countries}."
)
logger.info("Finish setup")
def __getitem__(self, item):
return self.__dict__[item]
def get_tmp_mwm_names(self) -> List[AnyStr]:
tmp_ext = ".mwm.tmp"
existing_names = set()
for f in os.listdir(self.paths.intermediate_tmp_path):
path = os.path.join(self.paths.intermediate_tmp_path, f)
if f.endswith(tmp_ext) and os.path.isfile(path):
name = f.replace(tmp_ext, "")
if name in self.countries:
existing_names.add(name)
return [c for c in self.countries if c in existing_names]
def add_skipped_stage(self, stage: Union[Type[Stage], Stage]):
if isinstance(stage, Stage):
stage = stage.__class__
self.skipped_stages.add(stage)
def is_accepted_stage(self, stage: Union[Type[Stage], Stage]) -> bool:
if isinstance(stage, Stage):
stage = stage.__class__
return stage not in self.skipped_stages
def finish(self):
self.main_status.finish()
def finish_mwm(self, mwm_name: AnyStr):
self.countries_meta[mwm_name]["status"].finish()
def set_subprocess_out(self, subprocess_out: Any, country: Optional[AnyStr] = None):
if country is None:
self.subprocess_out = subprocess_out
else:
self.subprocess_countries_out[country] = subprocess_out
def get_subprocess_out(self, country: Optional[AnyStr] = None):
if country is None:
return self.subprocess_out
else:
return self.subprocess_countries_out[country]
@staticmethod
def setup_logging():
def exception_handler(type, value, tb):
logger.exception(
f"Uncaught exception: {str(value)}", exc_info=(type, value, tb)
)
logging.config.dictConfig(settings.LOGGING)
sys.excepthook = exception_handler
@staticmethod
def setup_generator_tool() -> AnyStr:
logger.info("Check generator tool ...")
exceptions = []
for gen_tool in settings.POSSIBLE_GEN_TOOL_NAMES:
gen_tool_path = shutil.which(gen_tool)
if gen_tool_path is None:
logger.info(f"Looking for generator tool in {settings.BUILD_PATH} ...")
try:
gen_tool_path = find_executable(settings.BUILD_PATH, gen_tool)
except FileNotFoundError as e:
exceptions.append(e)
continue
logger.info(f"Generator tool found - {gen_tool_path}")
return gen_tool_path
raise Exception(exceptions)
@staticmethod
def setup_world_roads_builder_tool() -> AnyStr:
logger.info(f"Check world_roads_builder_tool. Looking for it in {settings.BUILD_PATH} ...")
world_roads_builder_tool_path = find_executable(settings.BUILD_PATH, "world_roads_builder_tool")
logger.info(f"world_roads_builder_tool found - {world_roads_builder_tool_path}")
return world_roads_builder_tool_path
@staticmethod
def setup_mwm_diff_tool() -> AnyStr:
logger.info(f"Check mwm_diff_tool. Looking for it in {settings.BUILD_PATH} ...")
mwm_diff_tool_path = find_executable(settings.BUILD_PATH, "mwm_diff_tool")
logger.info(f"mwm_diff_tool found - {mwm_diff_tool_path}")
return mwm_diff_tool_path
@staticmethod
def setup_osm_tools() -> Dict[AnyStr, AnyStr]:
path = settings.OSM_TOOLS_PATH
osm_tool_names = [
settings.OSM_TOOL_CONVERT,
settings.OSM_TOOL_UPDATE,
settings.OSM_TOOL_FILTER,
]
logger.info("Check for the osmctools binaries...")
# Check in the configured path first.
tmp_paths = [os.path.join(path, t) for t in osm_tool_names]
if not all([is_executable(t) for t in tmp_paths]):
# Or use a system-wide installation.
tmp_paths = [shutil.which(t) for t in osm_tool_names]
if all([is_executable(t) for t in tmp_paths]):
osm_tool_paths = dict(zip(osm_tool_names, tmp_paths))
logger.info(f"Found osmctools at {', '.join(osm_tool_paths.values())}")
return osm_tool_paths
logger.info(f"osmctools are not found, building from the sources into {path}...")
os.makedirs(path, exist_ok=True)
return build_osmtools(settings.OSM_TOOLS_SRC_PATH)
def setup_borders(self):
temp_borders = self.paths.generation_borders_path
borders = PathProvider.borders_path()
for x in self.countries:
if x in WORLDS_NAMES:
continue
poly = f"{x}.poly"
make_symlink(os.path.join(borders, poly), os.path.join(temp_borders, poly))
make_symlink(temp_borders, os.path.join(self.paths.draft_path, "borders"))
def setup_osm2ft(self):
for x in os.listdir(self.paths.osm2ft_path):
p = os.path.join(self.paths.osm2ft_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(self.paths.mwm_path, x))

View file

@ -0,0 +1,58 @@
import os
import subprocess
class MapsGeneratorError(Exception):
pass
class OptionNotFound(MapsGeneratorError):
pass
class ValidationError(MapsGeneratorError):
pass
class ContinueError(MapsGeneratorError):
pass
class SkipError(MapsGeneratorError):
pass
class BadExitStatusError(MapsGeneratorError):
pass
class ParseError(MapsGeneratorError):
pass
class FailedTest(MapsGeneratorError):
pass
def wait_and_raise_if_fail(p):
if p.wait() != os.EX_OK:
if type(p) is subprocess.Popen:
args = p.args
stdout = p.stdout
stderr = p.stderr
logs = None
errors = None
if type(stdout) is not type(None):
logs = stdout.read(256).decode()
if type(stderr) is not type(None):
errors = stderr.read(256).decode()
if errors != logs:
logs += " and " + errors
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)
else:
args = p.args
logs = p.output.name
if p.error.name != logs:
logs += " and " + p.error.name
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)

View file

@ -0,0 +1,162 @@
import copy
import logging
import os
import subprocess
from maps_generator.generator.exceptions import OptionNotFound
from maps_generator.generator.exceptions import ValidationError
from maps_generator.generator.exceptions import wait_and_raise_if_fail
logger = logging.getLogger("maps_generator")
class GenTool:
OPTIONS = {
"dump_cities_boundaries": bool,
"emit_coasts": bool,
"fail_on_coasts": bool,
"generate_cameras": bool,
"generate_cities_boundaries": bool,
"generate_cities_ids": bool,
"generate_features": bool,
"generate_geo_objects_features": bool,
"generate_geo_objects_index": bool,
"generate_geometry": bool,
"generate_index": bool,
"generate_isolines_info": bool,
"generate_maxspeed": bool,
"generate_packed_borders": bool,
"generate_popular_places": bool,
"generate_region_features": bool,
"generate_regions": bool,
"generate_regions_kv": bool,
"generate_search_index": bool,
"generate_traffic_keys": bool,
"generate_world": bool,
"have_borders_for_whole_world": bool,
"make_city_roads": bool,
"make_coasts": bool,
"make_cross_mwm": bool,
"make_routing_index": bool,
"make_transit_cross_mwm": bool,
"make_transit_cross_mwm_experimental": bool,
"preprocess": bool,
"split_by_polygons": bool,
"stats_types": bool,
"version": bool,
"threads_count": int,
"booking_data": str,
"promo_catalog_cities": str,
"brands_data": str,
"brands_translations_data": str,
"cache_path": str,
"cities_boundaries_data": str,
"data_path": str,
"dump_wikipedia_urls": str,
"geo_objects_features": str,
"geo_objects_key_value": str,
"ids_without_addresses": str,
"idToWikidata": str,
"intermediate_data_path": str,
"isolines_path": str,
"addresses_path": str,
"nodes_list_path": str,
"node_storage": str,
"osm_file_name": str,
"osm_file_type": str,
"output": str,
"planet_version": str,
"popular_places_data": str,
"regions_features": str,
"regions_index": str,
"regions_key_value": str,
"srtm_path": str,
"transit_path": str,
"transit_path_experimental": str,
"world_roads_path": str,
"ugc_data": str,
"uk_postcodes_dataset": str,
"us_postcodes_dataset": str,
"user_resource_path": str,
"wikipedia_pages": str,
}
def __init__(
self, name_executable, out=subprocess.DEVNULL, err=subprocess.DEVNULL, **options
):
self.name_executable = name_executable
self.subprocess = None
self.output = out
self.error = err
self.options = {"threads_count": 1}
self.logger = logger
self.add_options(**options)
@property
def args(self):
return self._collect_cmd()
def add_options(self, **options):
if "logger" in options:
self.logger = options["logger"]
for k, v in options.items():
if k == "logger":
continue
if k not in GenTool.OPTIONS:
raise OptionNotFound(f"{k} is unavailable option")
if type(v) is not GenTool.OPTIONS[k]:
raise ValidationError(
f"{k} required {str(GenTool.OPTIONS[k])},"
f" but not {str(type(v))}"
)
self.options[k] = str(v).lower() if type(v) is bool else v
return self
def run_async(self):
assert self.subprocess is None, "You forgot to call wait()"
cmd = self._collect_cmd()
self.subprocess = subprocess.Popen(
cmd, stdout=self.output, stderr=self.error, env=os.environ
)
self.logger.info(
f"Run generator tool [{self.get_build_version()}]:" f" {' '.join(cmd)} "
)
return self
def wait(self):
code = self.subprocess.wait()
self.subprocess = None
return code
def run(self):
self.run_async()
wait_and_raise_if_fail(self)
def branch(self):
c = GenTool(self.name_executable, out=self.output, err=self.error)
c.options = copy.deepcopy(self.options)
return c
def get_build_version(self):
p = subprocess.Popen(
[self.name_executable, "--version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=os.environ,
)
wait_and_raise_if_fail(p)
out, err = p.communicate()
return out.decode("utf-8").replace("\n", " ").strip()
def _collect_cmd(self):
options = ["".join(["--", k, "=", str(v)]) for k, v in self.options.items()]
return [self.name_executable, *options]
def run_gen_tool(*args, **kwargs):
GenTool(*args, **kwargs).run()

View file

@ -0,0 +1,151 @@
import os
from typing import AnyStr
from typing import List
from typing import Optional
from typing import Type
from typing import Union
import filelock
from maps_generator.generator.env import Env
from maps_generator.generator.exceptions import ContinueError
from maps_generator.generator.stages import Stage
from maps_generator.generator.stages import get_stage_name
from maps_generator.generator.stages import stages
from maps_generator.generator.status import Status
from maps_generator.generator.status import without_stat_ext
class Generation:
"""
Generation describes process of a map generation. It contains stages.
For example:
generation = Generation(env)
generation.add_stage(s1)
generation.add_stage(s2)
generation.run()
"""
def __init__(self, env: Env, build_lock: bool = True):
self.env: Env = env
self.stages: List[Stage] = []
self.runnable_stages: Optional[List[Stage]] = None
self.build_lock: bool = build_lock
for country_stage in stages.countries_stages:
if self.is_skipped_stage(country_stage):
self.env.add_skipped_stage(country_stage)
for stage in stages.stages:
if self.is_skipped_stage(stage):
self.env.add_skipped_stage(stage)
def is_skipped_stage(self, stage: Union[Type[Stage], Stage]) -> bool:
return (
stage.is_production_only and not self.env.production
) or not self.env.is_accepted_stage(stage)
def add_stage(self, stage: Stage):
self.stages.append(stage)
if self.is_skipped_stage(stage):
self.env.add_skipped_stage(stage)
def pre_run(self):
skipped = set()
def traverse(current: Type[Stage]):
deps = stages.dependencies.get(current, [])
for d in deps:
skipped.add(d)
traverse(d)
for skipped_stage in self.env.skipped_stages:
traverse(skipped_stage)
for s in skipped:
self.env.add_skipped_stage(s)
self.runnable_stages = [s for s in self.stages if self.env.is_accepted_stage(s)]
def run(self, from_stage: Optional[AnyStr] = None):
self.pre_run()
if from_stage is not None:
self.reset_to_stage(from_stage)
if self.build_lock:
lock_filename = f"{os.path.join(self.env.paths.build_path, 'lock')}.lock"
with filelock.FileLock(lock_filename, timeout=1):
self.run_stages()
else:
self.run_stages()
def run_stages(self):
for stage in self.runnable_stages:
stage(self.env)
def reset_to_stage(self, stage_name: AnyStr):
"""
Resets generation state to stage_name.
Status files are overwritten new statuses according stage_name.
It supposes that stages have next representation:
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
"""
high_level_stages = [get_stage_name(s) for s in self.runnable_stages]
if not (
stage_name in high_level_stages
or any(stage_name == get_stage_name(s) for s in stages.countries_stages)
):
raise ContinueError(f"{stage_name} not in {', '.join(high_level_stages)}.")
if not os.path.exists(self.env.paths.status_path):
raise ContinueError(f"Status path {self.env.paths.status_path} not found.")
if not os.path.exists(self.env.paths.main_status_path):
raise ContinueError(
f"Status file {self.env.paths.main_status_path} not found."
)
countries_statuses_paths = []
countries = set(self.env.countries)
for f in os.listdir(self.env.paths.status_path):
full_name = os.path.join(self.env.paths.status_path, f)
if (
os.path.isfile(full_name)
and full_name != self.env.paths.main_status_path
and without_stat_ext(f) in countries
):
countries_statuses_paths.append(full_name)
def set_countries_stage(st):
for path in countries_statuses_paths:
Status(path, st).update_status()
def finish_countries_stage():
for path in countries_statuses_paths:
Status(path).finish()
def index(l: List, val):
try:
return l.index(val)
except ValueError:
return -1
mwm_stage_name = get_stage_name(stages.mwm_stage)
stage_mwm_index = index(high_level_stages, mwm_stage_name)
main_status = None
if (
stage_mwm_index == -1
or stage_name in high_level_stages[: stage_mwm_index + 1]
):
main_status = stage_name
set_countries_stage("")
elif stage_name in high_level_stages[stage_mwm_index + 1 :]:
main_status = stage_name
finish_countries_stage()
else:
main_status = get_stage_name(stages.mwm_stage)
set_countries_stage(stage_name)
Status(self.env.paths.main_status_path, main_status).update_status()

View file

@ -0,0 +1,121 @@
import os
import subprocess
from maps_generator.generator import settings
from maps_generator.generator.exceptions import BadExitStatusError
from maps_generator.generator.exceptions import wait_and_raise_if_fail
def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL):
src = {
settings.OSM_TOOL_UPDATE: "osmupdate.c",
settings.OSM_TOOL_FILTER: "osmfilter.c",
settings.OSM_TOOL_CONVERT: "osmconvert.c",
}
ld_flags = ("-lz",)
cc = []
result = {}
for executable, src in src.items():
out = os.path.join(settings.OSM_TOOLS_PATH, executable)
op = [
settings.OSM_TOOLS_CC,
*settings.OSM_TOOLS_CC_FLAGS,
"-o",
out,
os.path.join(path, src),
*ld_flags,
]
s = subprocess.Popen(op, stdout=output, stderr=error)
cc.append(s)
result[executable] = out
messages = []
for c in cc:
if c.wait() != os.EX_OK:
messages.append(f"The launch of {' '.join(c.args)} failed.")
if messages:
raise BadExitStatusError("\n".join(messages))
return result
def osmconvert(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
p = subprocess.Popen(
[
name_executable,
in_file,
"--drop-author",
"--drop-version",
"--out-o5m",
f"-o={out_file}",
],
env=env,
stdout=output,
stderr=error,
)
if run_async:
return p
else:
wait_and_raise_if_fail(p)
def osmupdate(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
p = subprocess.Popen(
[
name_executable,
"--drop-author",
"--drop-version",
"--out-o5m",
"-v",
in_file,
out_file,
],
env=env,
stdout=output,
stderr=error,
)
if run_async:
return p
else:
wait_and_raise_if_fail(p)
def osmfilter(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
args = [name_executable, in_file, f"-o={out_file}"] + [
f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items()
]
p = subprocess.Popen(args, env=env, stdout=output, stderr=error)
if run_async:
return p
else:
wait_and_raise_if_fail(p)

View file

@ -0,0 +1,333 @@
import argparse
import multiprocessing
import os
import site
import sys
from configparser import ConfigParser
from configparser import ExtendedInterpolation
from pathlib import Path
from typing import Any
from typing import AnyStr
from maps_generator.utils.md5 import md5_ext
from maps_generator.utils.system import total_virtual_memory
ETC_DIR = os.path.join(os.path.dirname(__file__), "..", "var", "etc")
parser = argparse.ArgumentParser(add_help=False)
opt_config = "--config"
parser.add_argument(opt_config, type=str, default="", help="Path to config")
def get_config_path(config_path: AnyStr):
"""
It tries to get an opt_config value.
If doesn't get the value a function returns config_path.
"""
argv = sys.argv
indexes = (-1, -1)
for i, opt in enumerate(argv):
if opt.startswith(f"{opt_config}="):
indexes = (i, i + 1)
if opt == opt_config:
indexes = (i, i + 2)
config_args = argv[indexes[0] : indexes[1]]
if config_args:
return parser.parse_args(config_args).config
config_var = os.environ.get(f"MM_GEN__CONFIG")
return config_path if config_var is None else config_var
class CfgReader:
"""
Config reader.
There are 3 way of getting an option. In priority order:
1. From system env.
2. From config.
3. From default values.
For using the option from system env you can build an option name as
MM__GEN__ + [SECTION_NAME] + _ + [VALUE_NAME].
"""
def __init__(self, default_settings_path: AnyStr):
self.config = ConfigParser(interpolation=ExtendedInterpolation())
self.config.read([get_config_path(default_settings_path)])
def get_opt(self, s: AnyStr, v: AnyStr, default: Any = None):
val = CfgReader._get_env_val(s, v)
if val is not None:
return val
return self.config.get(s, v) if self.config.has_option(s, v) else default
def get_opt_path(self, s: AnyStr, v: AnyStr, default: AnyStr = ""):
return os.path.expanduser(self.get_opt(s, v, default))
@staticmethod
def _get_env_val(s: AnyStr, v: AnyStr):
return os.environ.get(f"MM_GEN__{s.upper()}_{v.upper()}")
DEFAULT_PLANET_URL = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf"
# Main section:
# If DEBUG is True, a little special planet is downloaded.
DEBUG = True
_HOME_PATH = str(Path.home())
_WORK_PATH = _HOME_PATH
TMPDIR = os.path.join(_HOME_PATH, "tmp")
MAIN_OUT_PATH = os.path.join(_WORK_PATH, "generation")
CACHE_PATH = ""
# Developer section:
BUILD_PATH = os.path.join(_WORK_PATH, "omim-build-relwithdebinfo")
OMIM_PATH = os.path.join(_WORK_PATH, "omim")
# Osm tools section:
OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools")
OSM_TOOLS_PATH = os.path.join(_WORK_PATH, "osmctools")
# Generator tool section:
USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data")
NODE_STORAGE = "map"
# Stages section:
NEED_PLANET_UPDATE = False
THREADS_COUNT_FEATURES_STAGE = multiprocessing.cpu_count()
DATA_ARCHIVE_DIR = ""
DIFF_VERSION_DEPTH = 2
# Logging section:
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
# External resources section:
PLANET_URL = DEFAULT_PLANET_URL
PLANET_COASTS_URL = ""
UGC_URL = ""
HOTELS_URL = ""
PROMO_CATALOG_CITIES_URL = ""
PROMO_CATALOG_COUNTRIES_URL = ""
POPULARITY_URL = ""
SUBWAY_URL = ""
TRANSIT_URL = ""
NEED_BUILD_WORLD_ROADS = True
FOOD_URL = ""
FOOD_TRANSLATIONS_URL = ""
UK_POSTCODES_URL = ""
US_POSTCODES_URL = ""
SRTM_PATH = ""
ISOLINES_PATH = ""
ADDRESSES_PATH = ""
# Stats section:
STATS_TYPES_CONFIG = os.path.join(ETC_DIR, "stats_types_config.txt")
# Other variables:
PLANET = "planet"
POSSIBLE_GEN_TOOL_NAMES = ("generator_tool", "omim-generator_tool")
VERSION_FILE_NAME = "version.txt"
# Osm tools:
OSM_TOOL_CONVERT = "osmconvert"
OSM_TOOL_FILTER = "osmfilter"
OSM_TOOL_UPDATE = "osmupdate"
OSM_TOOLS_CC = "cc"
OSM_TOOLS_CC_FLAGS = [
"-O3",
]
# Planet and coasts:
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom")
# Common:
THREADS_COUNT = multiprocessing.cpu_count()
# for lib logging
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {"format": "[%(asctime)s] %(levelname)s %(module)s %(message)s"},
},
"handlers": {
"stdout": {
"level": "INFO",
"class": "logging.StreamHandler",
"formatter": "standard",
},
"file": {
"level": "DEBUG",
"class": "logging.handlers.WatchedFileHandler",
"formatter": "standard",
"filename": LOG_FILE_PATH,
},
},
"loggers": {
"maps_generator": {
"handlers": ["stdout", "file"],
"level": "DEBUG",
"propagate": True,
}
},
}
def init(default_settings_path: AnyStr):
# Try to read a config and to overload default settings
cfg = CfgReader(default_settings_path)
# Main section:
global DEBUG
global TMPDIR
global MAIN_OUT_PATH
global CACHE_PATH
_DEBUG = cfg.get_opt("Main", "DEBUG")
DEBUG = DEBUG if _DEBUG is None else int(_DEBUG)
TMPDIR = cfg.get_opt_path("Main", "TMPDIR", TMPDIR)
MAIN_OUT_PATH = cfg.get_opt_path("Main", "MAIN_OUT_PATH", MAIN_OUT_PATH)
CACHE_PATH = cfg.get_opt_path("Main", "CACHE_PATH", CACHE_PATH)
# Developer section:
global BUILD_PATH
global OMIM_PATH
BUILD_PATH = cfg.get_opt_path("Developer", "BUILD_PATH", BUILD_PATH)
OMIM_PATH = cfg.get_opt_path("Developer", "OMIM_PATH", OMIM_PATH)
# Osm tools section:
global OSM_TOOLS_SRC_PATH
global OSM_TOOLS_PATH
OSM_TOOLS_SRC_PATH = cfg.get_opt_path(
"Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH
)
OSM_TOOLS_PATH = cfg.get_opt_path("Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH)
# Generator tool section:
global USER_RESOURCE_PATH
global NODE_STORAGE
USER_RESOURCE_PATH = cfg.get_opt_path(
"Generator tool", "USER_RESOURCE_PATH", USER_RESOURCE_PATH
)
NODE_STORAGE = cfg.get_opt("Generator tool", "NODE_STORAGE", NODE_STORAGE)
assert os.path.exists(OMIM_PATH) is True, f"Can't find OMIM_PATH (set to {OMIM_PATH})"
if not os.path.exists(USER_RESOURCE_PATH):
from data_files import find_data_files
USER_RESOURCE_PATH = find_data_files("omim-data")
assert USER_RESOURCE_PATH is not None
import borders
# Issue: If maps_generator is installed in your system as a system
# package and borders.init() is called first time, call borders.init()
# might return False, because you need root permission.
assert borders.init()
# Stages section:
global NEED_PLANET_UPDATE
global DATA_ARCHIVE_DIR
global DIFF_VERSION_DEPTH
global THREADS_COUNT_FEATURES_STAGE
NEED_PLANET_UPDATE = cfg.get_opt("Stages", "NEED_PLANET_UPDATE", NEED_PLANET_UPDATE)
DATA_ARCHIVE_DIR = cfg.get_opt_path(
"Stages", "DATA_ARCHIVE_DIR", DATA_ARCHIVE_DIR
)
DIFF_VERSION_DEPTH = int(cfg.get_opt(
"Stages", "DIFF_VERSION_DEPTH", DIFF_VERSION_DEPTH
))
threads_count = int(
cfg.get_opt(
"Generator tool",
"THREADS_COUNT_FEATURES_STAGE",
THREADS_COUNT_FEATURES_STAGE,
)
)
if threads_count > 0:
THREADS_COUNT_FEATURES_STAGE = threads_count
# Logging section:
global LOG_FILE_PATH
global LOGGING
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
LOG_FILE_PATH = cfg.get_opt_path("Logging", "MAIN_LOG", LOG_FILE_PATH)
os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True)
LOGGING["handlers"]["file"]["filename"] = LOG_FILE_PATH
# External section:
global PLANET_URL
global PLANET_MD5_URL
global PLANET_COASTS_URL
global UGC_URL
global HOTELS_URL
global PROMO_CATALOG_CITIES_URL
global PROMO_CATALOG_COUNTRIES_URL
global POPULARITY_URL
global SUBWAY_URL
global TRANSIT_URL
global NEED_BUILD_WORLD_ROADS
global FOOD_URL
global UK_POSTCODES_URL
global US_POSTCODES_URL
global FOOD_TRANSLATIONS_URL
global SRTM_PATH
global ISOLINES_PATH
global ADDRESSES_PATH
PLANET_URL = cfg.get_opt_path("External", "PLANET_URL", PLANET_URL)
PLANET_MD5_URL = cfg.get_opt_path("External", "PLANET_MD5_URL", md5_ext(PLANET_URL))
PLANET_COASTS_URL = cfg.get_opt_path(
"External", "PLANET_COASTS_URL", PLANET_COASTS_URL
)
UGC_URL = cfg.get_opt_path("External", "UGC_URL", UGC_URL)
HOTELS_URL = cfg.get_opt_path("External", "HOTELS_URL", HOTELS_URL)
PROMO_CATALOG_CITIES_URL = cfg.get_opt_path(
"External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL
)
PROMO_CATALOG_COUNTRIES_URL = cfg.get_opt_path(
"External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL
)
POPULARITY_URL = cfg.get_opt_path("External", "POPULARITY_URL", POPULARITY_URL)
SUBWAY_URL = cfg.get_opt("External", "SUBWAY_URL", SUBWAY_URL)
TRANSIT_URL = cfg.get_opt("External", "TRANSIT_URL", TRANSIT_URL)
NEED_BUILD_WORLD_ROADS = cfg.get_opt("External", "NEED_BUILD_WORLD_ROADS", NEED_BUILD_WORLD_ROADS)
FOOD_URL = cfg.get_opt("External", "FOOD_URL", FOOD_URL)
UK_POSTCODES_URL = cfg.get_opt("External", "UK_POSTCODES_URL", UK_POSTCODES_URL)
US_POSTCODES_URL = cfg.get_opt("External", "US_POSTCODES_URL", US_POSTCODES_URL)
FOOD_TRANSLATIONS_URL = cfg.get_opt(
"External", "FOOD_TRANSLATIONS_URL", FOOD_TRANSLATIONS_URL
)
SRTM_PATH = cfg.get_opt_path("External", "SRTM_PATH", SRTM_PATH)
ISOLINES_PATH = cfg.get_opt_path("External", "ISOLINES_PATH", ISOLINES_PATH)
ADDRESSES_PATH = cfg.get_opt_path("External", "ADDRESSES_PATH", ADDRESSES_PATH)
# Stats section:
global STATS_TYPES_CONFIG
STATS_TYPES_CONFIG = cfg.get_opt_path(
"Stats", "STATS_TYPES_CONFIG", STATS_TYPES_CONFIG
)
# Common:
global THREADS_COUNT
threads_count = int(cfg.get_opt("Common", "THREADS_COUNT", THREADS_COUNT))
if threads_count > 0:
THREADS_COUNT = threads_count
# Planet and costs:
global PLANET_COASTS_GEOM_URL
global PLANET_COASTS_RAWGEOM_URL
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom")
if DEBUG:
PLANET_URL = "https://www.dropbox.com/s/m3ru5tnj8g9u4cz/planet-latest.o5m?raw=1"
PLANET_MD5_URL = (
"https://www.dropbox.com/s/8wdl2hy22jgisk5/planet-latest.o5m.md5?raw=1"
)
NEED_PLANET_UPDATE = False

View file

@ -0,0 +1,380 @@
""""
This file contains some decorators that define stages.
There are two main types of stages:
1. outer_stage - a high level stage
2. country_stage - a stage that applies to countries files(*.mwm).
country_stage might be inside stage. There are country stages inside mwm_stage.
mwm_stage is only one stage that contains country_stages.
"""
import datetime
import logging
import os
import time
from abc import ABC
from abc import abstractmethod
from collections import defaultdict
from typing import AnyStr
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Type
from typing import Union
import filelock
from maps_generator.generator import status
from maps_generator.generator.exceptions import FailedTest
from maps_generator.utils.file import download_files
from maps_generator.utils.file import normalize_url_to_path_dict
from maps_generator.utils.log import DummyObject
from maps_generator.utils.log import create_file_handler
from maps_generator.utils.log import create_file_logger
logger = logging.getLogger("maps_generator")
class InternalDependency:
def __init__(self, url, path_method, mode=""):
self.url = url
self.path_method = path_method
self.mode = mode
class Test:
def __init__(self, test, need_run=None, is_pretest=False):
self._test = test
self._need_run = need_run
self.is_pretest = is_pretest
@property
def name(self):
return self._test.__name__
def need_run(self, env, _logger):
if self._need_run is None:
return True
if callable(self._need_run):
return self._need_run(env, _logger)
return self._need_run
def test(self, env, _logger, *args, **kwargs):
try:
res = self._test(env, _logger, *args, **kwargs)
except Exception as e:
raise FailedTest(f"Test {self.name} is failed.") from e
if not res:
raise FailedTest(f"Test {self.name} is failed.")
_logger.info(f"Test {self.name} is successfully completed.")
class Stage(ABC):
need_planet_lock = False
need_build_lock = False
is_helper = False
is_mwm_stage = False
is_production_only = False
def __init__(self, **args):
self.args = args
def __call__(self, env: "Env"):
return self.apply(env, **self.args)
@abstractmethod
def apply(self, *args, **kwargs):
pass
def get_stage_name(stage: Union[Type[Stage], Stage]) -> AnyStr:
n = stage.__class__.__name__ if isinstance(stage, Stage) else stage.__name__
return n.replace("Stage", "")
def get_stage_type(stage: Union[Type[Stage], AnyStr]):
from . import stages_declaration as sd
if isinstance(stage, str):
if not stage.startswith("Stage"):
stage = f"Stage{stage}"
return getattr(sd, stage)
return stage
class Stages:
"""Stages class is used for storing all stages."""
def __init__(self):
self.mwm_stage: Optional[Type[Stage]] = None
self.countries_stages: List[Type[Stage]] = []
self.stages: List[Type[Stage]] = []
self.helper_stages: List[Type[Stage]] = []
self.dependencies = defaultdict(set)
def init(self):
# We normalize self.dependencies to Dict[Type[Stage], Set[Type[Stage]]].
dependencies = defaultdict(set)
for k, v in self.dependencies.items():
dependencies[get_stage_type(k)] = set(get_stage_type(x) for x in v)
self.dependencies = dependencies
def set_mwm_stage(self, stage: Type[Stage]):
assert self.mwm_stage is None
self.mwm_stage = stage
def add_helper_stage(self, stage: Type[Stage]):
self.helper_stages.append(stage)
def add_country_stage(self, stage: Type[Stage]):
self.countries_stages.append(stage)
def add_stage(self, stage: Type[Stage]):
self.stages.append(stage)
def add_dependency_for(self, stage: Type[Stage], *deps):
for dep in deps:
self.dependencies[stage].add(dep)
def get_invisible_stages_names(self) -> List[AnyStr]:
return [get_stage_name(st) for st in self.helper_stages]
def get_visible_stages_names(self) -> List[AnyStr]:
"""Returns all stages names except helper stages names."""
stages = []
for s in self.stages:
stages.append(get_stage_name(s))
if s == self.mwm_stage:
stages += [get_stage_name(st) for st in self.countries_stages]
return stages
def is_valid_stage_name(self, stage_name) -> bool:
return get_stage_name(self.mwm_stage) == stage_name or any(
any(stage_name == get_stage_name(x) for x in c)
for c in [self.countries_stages, self.stages, self.helper_stages]
)
# A global variable stage contains all possible stages.
stages = Stages()
def outer_stage(stage: Type[Stage]) -> Type[Stage]:
"""It's decorator that defines high level stage."""
if stage.is_helper:
stages.add_helper_stage(stage)
else:
stages.add_stage(stage)
if stage.is_mwm_stage:
stages.set_mwm_stage(stage)
def new_apply(method):
def apply(obj: Stage, env: "Env", *args, **kwargs):
name = get_stage_name(obj)
logfile = os.path.join(env.paths.log_path, f"{name}.log")
log_handler = create_file_handler(logfile)
logger.addHandler(log_handler)
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_START_MSG_PATTERN
logger.info(f"Stage {name}: start ...")
t = time.time()
try:
if not env.is_accepted_stage(stage):
logger.info(f"Stage {name} was not accepted.")
return
main_status = env.main_status
main_status.init(env.paths.main_status_path, name)
if main_status.need_skip():
logger.warning(f"Stage {name} was skipped.")
return
main_status.update_status()
env.set_subprocess_out(log_handler.stream)
method(obj, env, *args, **kwargs)
finally:
d = time.time() - t
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_FINISH_MSG_PATTERN
logger.info(
f"Stage {name}: finished in {str(datetime.timedelta(seconds=d))}"
)
logger.removeHandler(log_handler)
return apply
stage.apply = new_apply(stage.apply)
return stage
def country_stage_status(stage: Type[Stage]) -> Type[Stage]:
"""It's helper decorator that works with status file."""
def new_apply(method):
def apply(obj: Stage, env: "Env", country: AnyStr, *args, **kwargs):
name = get_stage_name(obj)
_logger = DummyObject()
countries_meta = env.countries_meta
if "logger" in countries_meta[country]:
_logger, _ = countries_meta[country]["logger"]
if not env.is_accepted_stage(stage):
_logger.info(f"Stage {name} was not accepted.")
return
if "status" not in countries_meta[country]:
countries_meta[country]["status"] = status.Status()
country_status = countries_meta[country]["status"]
status_file = os.path.join(
env.paths.status_path, status.with_stat_ext(country)
)
country_status.init(status_file, name)
if country_status.need_skip():
_logger.warning(f"Stage {name} was skipped.")
return
country_status.update_status()
method(obj, env, country, *args, **kwargs)
return apply
stage.apply = new_apply(stage.apply)
return stage
def country_stage_log(stage: Type[Stage]) -> Type[Stage]:
"""It's helper decorator that works with log file."""
def new_apply(method):
def apply(obj: Stage, env: "Env", country: AnyStr, *args, **kwargs):
name = get_stage_name(obj)
log_file = os.path.join(env.paths.log_path, f"{country}.log")
countries_meta = env.countries_meta
if "logger" not in countries_meta[country]:
countries_meta[country]["logger"] = create_file_logger(log_file)
_logger, log_handler = countries_meta[country]["logger"]
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_START_MSG_PATTERN
_logger.info(f"Stage {name}: start ...")
t = time.time()
env.set_subprocess_out(log_handler.stream, country)
method(obj, env, country, *args, logger=_logger, **kwargs)
d = time.time() - t
# This message is used as an anchor for parsing logs.
# See maps_generator/checks/logs/logs_reader.py STAGE_FINISH_MSG_PATTERN
_logger.info(
f"Stage {name}: finished in {str(datetime.timedelta(seconds=d))}"
)
return apply
stage.apply = new_apply(stage.apply)
return stage
def test_stage(*tests: Test) -> Callable[[Type[Stage],], Type[Stage]]:
def new_apply(method):
def apply(obj: Stage, env: "Env", *args, **kwargs):
_logger = kwargs["logger"] if "logger" in kwargs else logger
def run_tests(tests):
for test in tests:
if test.need_run(env, _logger):
test.test(env, _logger, *args, **kwargs)
else:
_logger.info(f"Test {test.name} was skipped.")
run_tests(filter(lambda t: t.is_pretest, tests))
method(obj, env, *args, **kwargs)
run_tests(filter(lambda t: not t.is_pretest, tests))
return apply
def wrapper(stage: Type[Stage]) -> Type[Stage]:
stage.apply = new_apply(stage.apply)
return stage
return wrapper
def country_stage(stage: Type[Stage]) -> Type[Stage]:
"""It's decorator that defines country stage."""
if stage.is_helper:
stages.add_helper_stage(stage)
else:
stages.add_country_stage(stage)
return country_stage_log(country_stage_status(stage))
def mwm_stage(stage: Type[Stage]) -> Type[Stage]:
stage.is_mwm_stage = True
return stage
def production_only(stage: Type[Stage]) -> Type[Stage]:
stage.is_production_only = True
return stage
def helper_stage_for(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
def wrapper(stage: Type[Stage]) -> Type[Stage]:
stages.add_dependency_for(stage, *deps)
stage.is_helper = True
return stage
return wrapper
def depends_from_internal(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
def get_urls(
env: "Env", internal_dependencies: List[InternalDependency]
) -> Dict[AnyStr, AnyStr]:
deps = {}
for d in internal_dependencies:
if "p" in d.mode and not env.production or not d.url:
continue
path = None
if type(d.path_method) is property:
path = d.path_method.__get__(env.paths)
assert path is not None, type(d.path_method)
deps[d.url] = path
return deps
def download_under_lock(env: "Env", urls: Dict[AnyStr, AnyStr], stage_name: AnyStr):
lock_name = f"{os.path.join(env.paths.status_path, stage_name)}.lock"
status_name = f"{os.path.join(env.paths.status_path, stage_name)}.download"
with filelock.FileLock(lock_name):
s = status.Status(status_name)
if not s.is_finished():
urls = normalize_url_to_path_dict(urls)
download_files(urls, env.force_download_files)
s.finish()
def new_apply(method):
def apply(obj: Stage, env: "Env", *args, **kwargs):
if hasattr(obj, "internal_dependencies") and obj.internal_dependencies:
urls = get_urls(env, obj.internal_dependencies)
if urls:
download_under_lock(env, urls, get_stage_name(obj))
method(obj, env, *args, **kwargs)
return apply
def wrapper(stage: Type[Stage]) -> Type[Stage]:
stage.internal_dependencies = deps
stage.apply = new_apply(stage.apply)
return stage
return wrapper

View file

@ -0,0 +1,446 @@
""""
This file contains possible stages that maps_generator can run.
Some algorithms suppose a maps genration processes looks like:
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
Only stage_mwm can contain country_
"""
import datetime
import json
import logging
import multiprocessing
import os
import shutil
import tarfile
import errno
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import AnyStr
from typing import Type
import maps_generator.generator.diffs as diffs
import maps_generator.generator.stages_tests as st
# from descriptions.descriptions_downloader import check_and_get_checker
# from descriptions.descriptions_downloader import download_from_wikidata_tags
# from descriptions.descriptions_downloader import download_from_wikipedia_tags
from maps_generator.generator import coastline
from maps_generator.generator import settings
from maps_generator.generator import steps
from maps_generator.generator.env import Env
from maps_generator.generator.env import PathProvider
from maps_generator.generator.env import WORLD_COASTS_NAME
from maps_generator.generator.env import WORLD_NAME
from maps_generator.generator.exceptions import BadExitStatusError
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.stages import InternalDependency as D
from maps_generator.generator.stages import Stage
from maps_generator.generator.stages import Test
from maps_generator.generator.stages import country_stage
from maps_generator.generator.stages import depends_from_internal
from maps_generator.generator.stages import helper_stage_for
from maps_generator.generator.stages import mwm_stage
from maps_generator.generator.stages import outer_stage
from maps_generator.generator.stages import production_only
from maps_generator.generator.stages import test_stage
from maps_generator.generator.statistics import get_stages_info
from maps_generator.utils.file import download_files
from maps_generator.utils.file import is_verified
from post_generation.hierarchy_to_countries import hierarchy_to_countries
from post_generation.inject_promo_ids import inject_promo_ids
logger = logging.getLogger("maps_generator")
def is_accepted(env: Env, stage: Type[Stage]) -> bool:
return env.is_accepted_stage(stage)
@outer_stage
class StageDownloadAndConvertPlanet(Stage):
def apply(self, env: Env, force_download: bool = True, **kwargs):
if force_download or not is_verified(env.paths.planet_o5m):
steps.step_download_and_convert_planet(
env, force_download=force_download, **kwargs
)
@outer_stage
class StageUpdatePlanet(Stage):
def apply(self, env: Env, **kwargs):
steps.step_update_planet(env, **kwargs)
@outer_stage
class StageCoastline(Stage):
def apply(self, env: Env, use_old_if_fail=True):
coasts_geom = "WorldCoasts.geom"
coasts_rawgeom = "WorldCoasts.rawgeom"
try:
coastline.make_coastline(env)
except BadExitStatusError as e:
if not use_old_if_fail:
raise e
logger.warning("Build coasts failed. Try to download the coasts...")
download_files(
{
settings.PLANET_COASTS_GEOM_URL: os.path.join(
env.paths.coastline_path, coasts_geom
),
settings.PLANET_COASTS_RAWGEOM_URL: os.path.join(
env.paths.coastline_path, coasts_rawgeom
),
}
)
for f in [coasts_geom, coasts_rawgeom]:
path = os.path.join(env.paths.coastline_path, f)
shutil.copy2(path, env.paths.intermediate_data_path)
@outer_stage
class StagePreprocess(Stage):
def apply(self, env: Env, **kwargs):
steps.step_preprocess(env, **kwargs)
@outer_stage
@depends_from_internal(
D(settings.HOTELS_URL, PathProvider.hotels_path, "p"),
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
D(settings.POPULARITY_URL, PathProvider.popularity_path, "p"),
D(settings.FOOD_URL, PathProvider.food_paths, "p"),
D(settings.FOOD_TRANSLATIONS_URL, PathProvider.food_translations_path, "p"),
)
@test_stage(
Test(st.make_test_booking_data(max_days=7), lambda e, _: e.production, True)
)
class StageFeatures(Stage):
def apply(self, env: Env):
extra = {}
if is_accepted(env, StageDescriptions):
extra.update({"idToWikidata": env.paths.id_to_wikidata_path})
if env.production:
extra.update(
{
"booking_data": env.paths.hotels_path,
"promo_catalog_cities": env.paths.promo_catalog_cities_path,
"popular_places_data": env.paths.popularity_path,
"brands_data": env.paths.food_paths,
"brands_translations_data": env.paths.food_translations_path,
}
)
if is_accepted(env, StageCoastline):
extra.update({"emit_coasts": True})
if is_accepted(env, StageIsolinesInfo):
extra.update({"isolines_path": PathProvider.isolines_path()})
extra.update({"addresses_path": PathProvider.addresses_path()})
steps.step_features(env, **extra)
if os.path.exists(env.paths.packed_polygons_path):
shutil.copy2(env.paths.packed_polygons_path, env.paths.mwm_path)
@outer_stage
@helper_stage_for("StageDescriptions")
class StageDownloadDescriptions(Stage):
def apply(self, env: Env):
"""
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
dump_wikipedia_urls=env.paths.wiki_url_path,
idToWikidata=env.paths.id_to_wikidata_path,
threads_count=settings.THREADS_COUNT,
)
# https://en.wikipedia.org/wiki/Wikipedia:Multilingual_statistics
langs = ("en", "de", "fr", "es", "ru", "tr")
checker = check_and_get_checker(env.paths.popularity_path)
download_from_wikipedia_tags(
env.paths.wiki_url_path, env.paths.descriptions_path, langs, checker
)
download_from_wikidata_tags(
env.paths.id_to_wikidata_path, env.paths.descriptions_path, langs, checker
)
"""
# The src folder is hardcoded here and must be implemented on the map building machine
src = "/home/planet/wikipedia/descriptions"
# The dest folder will generally become build/*/intermediate_data/descriptions
dest = env.paths.descriptions_path
# An empty source folder is a big problem
try:
if os.path.isdir(src):
print("Found %s" % (src))
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), src)
except OSError as e:
print("rmtree error: %s - %s" % (e.filename, e.strerror))
# Empty folder "descriptions" can be already created.
try:
if os.path.isdir(dest):
shutil.rmtree(dest)
else:
os.remove(dest)
except OSError as e:
print("rmtree error: %s - %s" % (e.filename, e.strerror))
os.symlink(src, dest)
@outer_stage
@mwm_stage
class StageMwm(Stage):
def apply(self, env: Env):
tmp_mwm_names = env.get_tmp_mwm_names()
if len(tmp_mwm_names):
logger.info(f'Number of feature data .mwm.tmp country files to process: {len(tmp_mwm_names)}')
with ThreadPoolExecutor(settings.THREADS_COUNT) as pool:
pool.map(
lambda c: StageMwm.make_mwm(c, env),
tmp_mwm_names
)
else:
# TODO: list all countries that were not found?
logger.warning(f'There are no feature data .mwm.tmp country files to process in {env.paths.intermediate_tmp_path}!')
logger.warning('Countries requested for generation are not in the supplied planet file?')
@staticmethod
def make_mwm(country: AnyStr, env: Env):
logger.info(f'Starting mwm generation for {country}')
world_stages = {
WORLD_NAME: [
StageIndex,
StageCitiesIdsWorld,
StagePopularityWorld,
StagePrepareRoutingWorld,
StageRoutingWorld,
StageMwmStatistics,
],
WORLD_COASTS_NAME: [StageIndex, StageMwmStatistics],
}
mwm_stages = [
StageIndex,
StageUgc,
StageSrtm,
StageIsolinesInfo,
StageDescriptions,
# call after descriptions
StagePopularity,
StageRouting,
StageRoutingTransit,
StageMwmDiffs,
StageMwmStatistics,
]
for stage in world_stages.get(country, mwm_stages):
logger.info(f'{country} mwm stage {stage.__name__}: start...')
stage(country=country)(env)
env.finish_mwm(country)
logger.info(f'Finished mwm generation for {country}')
@country_stage
class StageIndex(Stage):
def apply(self, env: Env, country, **kwargs):
if country == WORLD_NAME:
steps.step_index_world(env, country, **kwargs)
elif country == WORLD_COASTS_NAME:
steps.step_coastline_index(env, country, **kwargs)
else:
kwargs.update(
{
"uk_postcodes_dataset": settings.UK_POSTCODES_URL,
"us_postcodes_dataset": settings.US_POSTCODES_URL,
}
)
steps.step_index(env, country, **kwargs)
@country_stage
@production_only
class StageCitiesIdsWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_cities_ids_world(env, country, **kwargs)
@country_stage
@helper_stage_for("StageRoutingWorld")
# ToDo: Are we sure that this stage will be skipped if StageRoutingWorld is skipped?
class StagePrepareRoutingWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_prepare_routing_world(env, country, **kwargs)
@country_stage
class StageRoutingWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing_world(env, country, **kwargs)
@country_stage
@depends_from_internal(D(settings.UGC_URL, PathProvider.ugc_path),)
@production_only
class StageUgc(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_ugc(env, country, **kwargs)
@country_stage
class StagePopularity(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_popularity(env, country, **kwargs)
@country_stage
class StagePopularityWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_popularity_world(env, country, **kwargs)
@country_stage
class StageSrtm(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_srtm(env, country, **kwargs)
@country_stage
class StageIsolinesInfo(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_isolines_info(env, country, **kwargs)
@country_stage
class StageDescriptions(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_description(env, country, **kwargs)
@country_stage
class StageRouting(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing(env, country, **kwargs)
@country_stage
@depends_from_internal(
D(settings.SUBWAY_URL, PathProvider.subway_path),
D(settings.TRANSIT_URL, PathProvider.transit_path_experimental),
)
class StageRoutingTransit(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing_transit(env, country, **kwargs)
@country_stage
class StageMwmDiffs(Stage):
def apply(self, env: Env, country, logger, **kwargs):
data_dir = diffs.DataDir(
diff_tool = env.diff_tool,
mwm_name = f"{country}.mwm",
new_version_dir = env.paths.mwm_path,
old_version_root_dir = settings.DATA_ARCHIVE_DIR,
)
diffs.mwm_diff_calculation(data_dir, logger, depth=settings.DIFF_VERSION_DEPTH)
@country_stage
@helper_stage_for("StageStatistics")
class StageMwmStatistics(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_statistics(env, country, **kwargs)
@outer_stage
@depends_from_internal(
D(
settings.PROMO_CATALOG_COUNTRIES_URL,
PathProvider.promo_catalog_countries_path,
"p",
),
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
)
class StageCountriesTxt(Stage):
def apply(self, env: Env):
countries = hierarchy_to_countries(
env.paths.old_to_new_path,
env.paths.borders_to_osm_path,
env.paths.countries_synonyms_path,
env.paths.hierarchy_path,
env.paths.mwm_path,
env.paths.mwm_version,
)
if env.production:
inject_promo_ids(
countries,
env.paths.promo_catalog_cities_path,
env.paths.promo_catalog_countries_path,
env.paths.mwm_path,
env.paths.types_path,
env.paths.mwm_path,
)
with open(env.paths.counties_txt_path, "w") as f:
json.dump(countries, f, ensure_ascii=False, indent=1)
@outer_stage
@production_only
class StageLocalAds(Stage):
def apply(self, env: Env):
create_csv(
env.paths.localads_path,
env.paths.mwm_path,
env.paths.mwm_path,
env.mwm_version,
multiprocessing.cpu_count(),
)
with tarfile.open(f"{env.paths.localads_path}.tar.gz", "w:gz") as tar:
for filename in os.listdir(env.paths.localads_path):
tar.add(os.path.join(env.paths.localads_path, filename), arcname=filename)
@outer_stage
class StageStatistics(Stage):
def apply(self, env: Env):
steps_info = get_stages_info(env.paths.log_path, {"statistics"})
stats = defaultdict(lambda: defaultdict(dict))
stats["steps"] = steps_info["steps"]
for country in env.get_tmp_mwm_names():
with open(os.path.join(env.paths.stats_path, f"{country}.json")) as f:
stats["countries"][country] = {
"types": json.load(f),
"steps": steps_info["countries"][country],
}
def default(o):
if isinstance(o, datetime.timedelta):
return str(o)
with open(os.path.join(env.paths.stats_path, "stats.json"), "w") as f:
json.dump(
stats, f, ensure_ascii=False, sort_keys=True, indent=2, default=default
)
@outer_stage
class StageCleanup(Stage):
def apply(self, env: Env):
logger.info(
f"osm2ft files will be moved from {env.paths.mwm_path} "
f"to {env.paths.osm2ft_path}."
)
for x in os.listdir(env.paths.mwm_path):
p = os.path.join(env.paths.mwm_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(env.paths.osm2ft_path, x))
logger.info(f"{env.paths.draft_path} will be removed.")
shutil.rmtree(env.paths.draft_path)

View file

@ -0,0 +1,27 @@
import os
from datetime import datetime
import json
from maps_generator.generator import settings
from maps_generator.generator.env import Env
from maps_generator.utils.file import download_file
def make_test_booking_data(max_days):
def test_booking_data(env: Env, logger, *args, **kwargs):
if not settings.HOTELS_URL:
return None
base_url, _ = settings.HOTELS_URL.rsplit("/", maxsplit=1)
url = f"{base_url}/meta.json"
meta_path = os.path.join(env.paths.tmp_dir(), "hotels-meta.json")
download_file(url, meta_path)
with open(meta_path) as f:
meta = json.load(f)
raw_date = meta["latest"].strip()
logger.info(f"Booking date is from {raw_date}.")
dt = datetime.strptime(raw_date, "%Y_%m_%d-%H_%M_%S")
return (env.dt - dt).days < max_days
return test_booking_data

View file

@ -0,0 +1,185 @@
import datetime
import json
import logging
import os
import re
from collections import defaultdict
from typing import AnyStr
from typing import Dict
from typing import List
from maps_generator.generator.env import WORLDS_NAMES
from maps_generator.generator.exceptions import ParseError
logger = logging.getLogger("maps_generator")
# Parse entries, written by ./generator/statistics.cpp PrintTypeStats.
RE_STAT = re.compile(
r"([\w:-]+): "
r"size = +\d+; "
r"features = +(\d+); "
r"length = +([0-9.e+-]+) m; "
r"area = +([0-9.e+-]+) m²; "
r"w\/names = +(\d+)"
)
RE_TIME_DELTA = re.compile(
r"^(?:(?P<days>-?\d+) (days?, )?)?"
r"((?:(?P<hours>-?\d+):)(?=\d+:\d+))?"
r"(?:(?P<minutes>-?\d+):)?"
r"(?P<seconds>-?\d+)"
r"(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$"
)
RE_FINISH_STAGE = re.compile(r"(.*)Stage (.+): finished in (.+)$")
def read_stat(f):
stats = []
for line in f:
m = RE_STAT.match(line)
# Skip explanation header strings.
if m is None:
continue
stats.append(
{
"name": m.group(1),
"cnt": int(m.group(2)),
"len": float(m.group(3)),
"area": float(m.group(4)),
"names": int(m.group(5)),
}
)
return stats
def read_config(f):
config = []
for line in f:
l = line.strip()
if l.startswith("#") or not l:
continue
columns = [c.strip() for c in l.split(";", 2)]
columns[0] = re.compile(columns[0])
columns[1] = columns[1].lower()
config.append(columns)
return config
def process_stat(config, stats):
result = {}
for param in config:
res = 0
for t in stats:
if param[0].match(t["name"]):
if param[1] == "len":
res += t["len"]
elif param[1] == "area":
res += t["area"]
elif param[1] == "cnt_names":
res += t["names"]
else:
res += t["cnt"]
result[str(param[0]) + param[1]] = res
return result
def format_res(res, t):
unit = None
if t == "len":
unit = "m"
elif t == "area":
unit = ""
elif t == "cnt" or t == "cnt_names":
unit = "pc"
else:
raise ParseError(f"Unknown type {t}.")
return res, unit
def make_stats(config_path, stats_path):
with open(config_path) as f:
config = read_config(f)
with open(stats_path) as f:
stats = process_stat(config, read_stat(f))
lines = []
for param in config:
k = str(param[0]) + param[1]
st = format_res(stats[k], param[1])
lines.append({"type": param[2], "quantity": st[0], "unit": st[1]})
return lines
def parse_time(time_str):
parts = RE_TIME_DELTA.match(time_str)
if not parts:
return
parts = parts.groupdict()
time_params = {}
for name, param in parts.items():
if param:
time_params[name] = int(param)
return datetime.timedelta(**time_params)
def get_stages_info(log_path, ignored_stages=frozenset()):
result = defaultdict(lambda: defaultdict(dict))
for file in os.listdir(log_path):
path = os.path.join(log_path, file)
with open(path) as f:
for line in f:
m = RE_FINISH_STAGE.match(line)
if not m:
continue
stage_name = m.group(2)
dt = parse_time(m.group(3))
if file.startswith("stage_") and stage_name not in ignored_stages:
result["stages"][stage_name] = dt
else:
country = file.split(".")[0]
result["countries"][country][stage_name] = dt
return result
def read_types(path: AnyStr) -> Dict[AnyStr, Dict]:
""""
Reads and summarizes statistics for all countries, excluding World and
WorldCoast.
"""
with open(path) as f:
json_data = json.load(f)
all_types = {}
countries = json_data["countries"]
for country, json_value in countries.items():
if country in WORLDS_NAMES:
continue
try:
json_types = json_value["types"]
except KeyError:
logger.exception(f"Cannot parse {json_value}")
continue
for t in json_types:
curr = all_types.get(t["type"], {})
curr["quantity"] = curr.get("quantity", 0.0) + t["quantity"]
curr["unit"] = t["unit"]
all_types[t["type"]] = curr
return all_types
def diff(new: Dict[AnyStr, Dict], old: Dict[AnyStr, Dict]) -> List:
assert len(new) == len(old)
lines = []
for key in new:
o = old[key]["quantity"]
n = new[key]["quantity"]
rel = 0
if o != 0.0:
rel = int(((n - o) / o) * 100)
else:
if n != 0.0:
rel = 100
lines.append((key, o, n, rel, n - o, new[key]["unit"],))
return lines

View file

@ -0,0 +1,53 @@
import os
from typing import AnyStr
from typing import Optional
def with_stat_ext(country: AnyStr):
return f"{country}.status"
def without_stat_ext(status: AnyStr):
return status.replace(".status", "")
class Status:
"""Status is used for recovering and continuation maps generation."""
def __init__(
self, stat_path: Optional[AnyStr] = None, stat_next: Optional[AnyStr] = None
):
self.stat_path = stat_path
self.stat_next = stat_next
self.stat_saved = None
self.find = False
def init(self, stat_path: AnyStr, stat_next: AnyStr):
self.stat_path = stat_path
self.stat_next = stat_next
self.stat_saved = self.status()
if not self.find:
self.find = not self.stat_saved or not self.need_skip()
def need_skip(self) -> bool:
if self.find:
return False
return self.stat_saved and self.stat_next != self.stat_saved
def update_status(self):
with open(self.stat_path, "w") as status:
status.write(self.stat_next)
def finish(self):
with open(self.stat_path, "w") as status:
status.write("finish")
def is_finished(self):
return self.status() == "finish"
def status(self):
try:
with open(self.stat_path) as status:
return status.read()
except IOError:
return None

View file

@ -0,0 +1,453 @@
"""
This file contains basic api for generator_tool and osm tools to generate maps.
"""
import functools
import json
import logging
import os
import shutil
import subprocess
from typing import AnyStr
from maps_generator.generator import settings
from maps_generator.generator.env import Env
from maps_generator.generator.env import PathProvider
from maps_generator.generator.env import WORLDS_NAMES
from maps_generator.generator.env import WORLD_NAME
from maps_generator.generator.env import get_all_countries_list
from maps_generator.generator.exceptions import ValidationError
from maps_generator.generator.exceptions import wait_and_raise_if_fail
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.osmtools import osmconvert
from maps_generator.generator.osmtools import osmfilter
from maps_generator.generator.osmtools import osmupdate
from maps_generator.generator.statistics import make_stats
from maps_generator.utils.file import download_files
from maps_generator.utils.file import is_verified
from maps_generator.utils.file import make_symlink
from maps_generator.utils.md5 import md5_ext
from maps_generator.utils.md5 import write_md5sum
logger = logging.getLogger("maps_generator")
def multithread_run_if_one_country(func):
@functools.wraps(func)
def wrap(env, country, **kwargs):
if len(env.countries) == 1:
kwargs.update({"threads_count": settings.THREADS_COUNT})
# Otherwise index stage of Taiwan_* mwms continues to run after all other mwms have finished:
elif country == 'Taiwan_North':
kwargs.update({"threads_count": 6})
elif country == 'Taiwan_South':
kwargs.update({"threads_count": 2})
func(env, country, **kwargs)
return wrap
def convert_planet(
tool: AnyStr,
in_planet: AnyStr,
out_planet: AnyStr,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmconvert(tool, in_planet, out_planet, output=output, error=error)
write_md5sum(out_planet, md5_ext(out_planet))
def step_download_and_convert_planet(env: Env, force_download: bool, **kwargs):
# Do not copy, convert, check a local .o5m planet dump, just symlink it instead.
src = settings.PLANET_URL
if src.startswith("file://") and src.endswith(".o5m"):
os.symlink(src[7:], env.paths.planet_o5m)
return
if force_download or not is_verified(env.paths.planet_osm_pbf):
download_files(
{
settings.PLANET_URL: env.paths.planet_osm_pbf,
settings.PLANET_MD5_URL: md5_ext(env.paths.planet_osm_pbf),
},
env.force_download_files,
)
if not is_verified(env.paths.planet_osm_pbf):
raise ValidationError(f"Wrong md5 sum for {env.paths.planet_osm_pbf}.")
convert_planet(
env[settings.OSM_TOOL_CONVERT],
env.paths.planet_osm_pbf,
env.paths.planet_o5m,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
)
os.remove(env.paths.planet_osm_pbf)
os.remove(md5_ext(env.paths.planet_osm_pbf))
def step_update_planet(env: Env, **kwargs):
tmp = f"{env.paths.planet_o5m}.tmp"
osmupdate(
env[settings.OSM_TOOL_UPDATE],
env.paths.planet_o5m,
tmp,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
**kwargs,
)
os.remove(env.paths.planet_o5m)
os.rename(tmp, env.paths.planet_o5m)
write_md5sum(env.paths.planet_o5m, md5_ext(env.paths.planet_o5m))
def step_preprocess(env: Env, **kwargs):
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
osm_file_type="o5m",
osm_file_name=env.paths.planet_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
preprocess=True,
**kwargs,
)
def step_features(env: Env, **kwargs):
if any(x not in WORLDS_NAMES for x in env.countries):
kwargs.update({"generate_packed_borders": True})
if any(x == WORLD_NAME for x in env.countries):
kwargs.update({"generate_world": True})
if len(env.countries) == len(get_all_countries_list(PathProvider.borders_path())):
kwargs.update({"have_borders_for_whole_world": True})
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
osm_file_type="o5m",
osm_file_name=env.paths.planet_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_features=True,
threads_count=settings.THREADS_COUNT_FEATURES_STAGE,
**kwargs,
)
def run_gen_tool_with_recovery_country(env: Env, *args, **kwargs):
if "data_path" not in kwargs or "output" not in kwargs:
logger.warning("The call run_gen_tool() will be without recovery.")
run_gen_tool(*args, **kwargs)
prev_data_path = kwargs["data_path"]
mwm = f"{kwargs['output']}.mwm"
osm2ft = f"{mwm}.osm2ft"
kwargs["data_path"] = env.paths.draft_path
make_symlink(
os.path.join(prev_data_path, osm2ft), os.path.join(env.paths.draft_path, osm2ft)
)
shutil.copy(
os.path.join(prev_data_path, mwm), os.path.join(env.paths.draft_path, mwm)
)
run_gen_tool(*args, **kwargs)
shutil.move(
os.path.join(env.paths.draft_path, mwm), os.path.join(prev_data_path, mwm)
)
kwargs["data_path"] = prev_data_path
@multithread_run_if_one_country
def _generate_common_index(env: Env, country: AnyStr, **kwargs):
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
node_storage=env.node_storage,
planet_version=env.planet_version,
generate_geometry=True,
generate_index=True,
output=country,
**kwargs,
)
def step_index_world(env: Env, country: AnyStr, **kwargs):
_generate_common_index(
env,
country,
generate_search_index=True,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_cities_boundaries=True,
**kwargs,
)
def step_cities_ids_world(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
output=country,
generate_cities_ids=True,
**kwargs,
)
def filter_roads(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmfilter(
name_executable,
in_file,
out_file,
output=output,
error=error,
keep="",
keep_ways="highway=motorway =trunk =primary =secondary =tertiary",
)
def make_world_road_graph(
name_executable,
path_roads_file,
path_resources,
path_res_file,
logger,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
world_roads_builder_tool_cmd = [
name_executable,
f"--path_roads_file={path_roads_file}",
f"--path_resources={path_resources}",
f"--path_res_file={path_res_file}",
]
logger.info(f"Starting {' '.join(world_roads_builder_tool_cmd)}")
world_roads_builder_tool = subprocess.Popen(
world_roads_builder_tool_cmd, stdout=output, stderr=error, env=os.environ
)
wait_and_raise_if_fail(world_roads_builder_tool)
def step_prepare_routing_world(env: Env, country: AnyStr, logger, **kwargs):
filter_roads(
env[settings.OSM_TOOL_FILTER],
env.paths.planet_o5m,
env.paths.world_roads_o5m,
env.get_subprocess_out(country),
env.get_subprocess_out(country),
)
make_world_road_graph(
env.world_roads_builder_tool,
env.paths.world_roads_o5m,
env.paths.user_resource_path,
env.paths.world_roads_path,
logger,
env.get_subprocess_out(country),
env.get_subprocess_out(country)
)
def step_routing_world(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
output=country,
world_roads_path=env.paths.world_roads_path,
**kwargs,
)
def step_index(env: Env, country: AnyStr, **kwargs):
_generate_common_index(env, country, generate_search_index=True, **kwargs)
def step_coastline_index(env: Env, country: AnyStr, **kwargs):
_generate_common_index(env, country, **kwargs)
def step_ugc(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
ugc_data=env.paths.ugc_path,
output=country,
**kwargs,
)
def step_popularity(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
generate_popular_places=True,
output=country,
**kwargs,
)
def step_popularity_world(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
wikipedia_pages=env.paths.descriptions_path,
idToWikidata=env.paths.id_to_wikidata_path,
generate_popular_places=True,
output=country,
**kwargs,
)
def step_srtm(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
srtm_path=env.paths.srtm_path(),
output=country,
**kwargs,
)
def step_isolines_info(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
generate_isolines_info=True,
isolines_path=PathProvider.isolines_path(),
output=country,
**kwargs,
)
def step_description(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
wikipedia_pages=env.paths.descriptions_path,
idToWikidata=env.paths.id_to_wikidata_path,
output=country,
**kwargs,
)
def step_routing(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_maxspeed=True,
make_city_roads=True,
make_cross_mwm=True,
generate_cameras=True,
make_routing_index=True,
generate_traffic_keys=False,
output=country,
**kwargs,
)
def step_routing_transit(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
transit_path=env.paths.transit_path,
transit_path_experimental=env.paths.transit_path_experimental,
make_transit_cross_mwm=True,
make_transit_cross_mwm_experimental=bool(env.paths.transit_path_experimental),
output=country,
**kwargs,
)
def step_statistics(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
cache_path=env.paths.cache_path,
user_resource_path=env.paths.user_resource_path,
stats_types=True,
output=country,
**kwargs,
)
with open(os.path.join(env.paths.stats_path, f"{country}.json"), "w") as f:
json.dump(
make_stats(
settings.STATS_TYPES_CONFIG,
os.path.join(env.paths.intermediate_data_path, f"{country}.stats"),
),
f,
)