Repo created
This commit is contained in:
parent
4af19165ec
commit
68073add76
12458 changed files with 12350765 additions and 2 deletions
2
tools/python/.gitignore
vendored
Normal file
2
tools/python/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
osm_cache.json
|
||||
transit/18*/
|
||||
57
tools/python/InstrumentsTraceParser.py
Executable file
57
tools/python/InstrumentsTraceParser.py
Executable file
|
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import print_function
|
||||
import struct
|
||||
import sys
|
||||
import numpy
|
||||
|
||||
|
||||
class Analyzer:
|
||||
"""
|
||||
The binary format is
|
||||
time since the beginning of the measurement : double
|
||||
unknown and irrelevant field : double
|
||||
momentary consumption calculated for the current time segment : double
|
||||
"""
|
||||
def __init__(self):
|
||||
self.duration = 0.0
|
||||
self.consumption = []
|
||||
self.mean = 0.0
|
||||
self.std = 0.0
|
||||
self.avg = 0.0
|
||||
self.averages = []
|
||||
|
||||
|
||||
def read_file(self, file_path):
|
||||
binary = bytearray()
|
||||
with open(file_path, "r") as f:
|
||||
binary = bytearray(f.read())
|
||||
|
||||
for i in range(0, len(binary) - 24, 24):
|
||||
res = struct.unpack(">ddd", binary[i:i+24])
|
||||
|
||||
current_duration = res[0]
|
||||
if not current_duration > self.duration:
|
||||
print("Unexpected elapsed time value, lower than the previous one.")
|
||||
exit(2) # this should never happen because the file is written sequentially
|
||||
|
||||
current_consumption = res[2]
|
||||
self.averages.append(current_consumption / (current_duration - self.duration))
|
||||
self.duration = current_duration
|
||||
|
||||
self.consumption.append(current_consumption)
|
||||
|
||||
self.calculate_stats()
|
||||
|
||||
|
||||
def calculate_stats(self):
|
||||
self.mean = numpy.mean(self.averages)
|
||||
self.std = numpy.std(self.averages)
|
||||
self.avg = sum(self.consumption) / self.duration
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for file_path in sys.argv[1:]:
|
||||
analyzer = Analyzer()
|
||||
analyzer.read_file(file_path)
|
||||
print("{}\n\tavg: {}\n\tmean: {}\n\tstd: {}".format(file_path, analyzer.avg, analyzer.mean, analyzer.std))
|
||||
14
tools/python/Util.py
Normal file
14
tools/python/Util.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
from contextlib import contextmanager
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
try:
|
||||
from tempfile import TemporaryDirectory
|
||||
except ImportError:
|
||||
@contextmanager
|
||||
def TemporaryDirectory():
|
||||
name = tempfile.mkdtemp()
|
||||
try:
|
||||
yield name
|
||||
finally:
|
||||
shutil.rmtree(name)
|
||||
0
tools/python/__init__.py
Normal file
0
tools/python/__init__.py
Normal file
16
tools/python/airmaps/README.md
Normal file
16
tools/python/airmaps/README.md
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# airmaps - building of maps using airflow.
|
||||
|
||||
## Storage
|
||||
|
||||
Repository of result and temporary files.
|
||||
Currently, the storage is a webdav server.
|
||||
|
||||
## Description of DAGs:
|
||||
|
||||
1. Update_planet - updates .o5m planet file.
|
||||
|
||||
2. Build_coastline - builds coastline files.
|
||||
|
||||
3. Generate_open_source_maps - builds free maps for maps.me
|
||||
|
||||
All results will be published on the storage.
|
||||
12
tools/python/airmaps/__init__.py
Normal file
12
tools/python/airmaps/__init__.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import os
|
||||
|
||||
from airmaps.instruments import settings
|
||||
|
||||
CONFIG_PATH = os.path.join(
|
||||
os.path.dirname(os.path.join(os.path.realpath(__file__))),
|
||||
"var",
|
||||
"etc",
|
||||
"airmaps.ini",
|
||||
)
|
||||
|
||||
settings.init(CONFIG_PATH)
|
||||
93
tools/python/airmaps/dags/build_coastline.py
Normal file
93
tools/python/airmaps/dags/build_coastline.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
from airmaps.instruments import settings
|
||||
from airmaps.instruments import storage
|
||||
from airmaps.instruments.utils import get_latest_filename
|
||||
from airmaps.instruments.utils import make_rm_build_task
|
||||
from airmaps.instruments.utils import put_current_date_in_filename
|
||||
from airmaps.instruments.utils import rm_build
|
||||
from maps_generator.generator import stages_declaration as sd
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.env import WORLD_COASTS_NAME
|
||||
from maps_generator.maps_generator import run_generation
|
||||
|
||||
logger = logging.getLogger("airmaps")
|
||||
|
||||
|
||||
DAG = DAG(
|
||||
"Build_coastline",
|
||||
schedule_interval=timedelta(days=1),
|
||||
default_args={
|
||||
"owner": "OMaps",
|
||||
"depends_on_past": True,
|
||||
"start_date": days_ago(0),
|
||||
"email": settings.EMAILS,
|
||||
"email_on_failure": True,
|
||||
"email_on_retry": False,
|
||||
"retries": 0,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
"priority_weight": 1,
|
||||
},
|
||||
)
|
||||
|
||||
COASTLINE_STORAGE_PATH = f"{settings.STORAGE_PREFIX}/coasts"
|
||||
|
||||
|
||||
def publish_coastline(**kwargs):
|
||||
build_name = kwargs["ti"].xcom_pull(key="build_name")
|
||||
env = Env(build_name=build_name)
|
||||
for name in (f"{WORLD_COASTS_NAME}.geom", f"{WORLD_COASTS_NAME}.rawgeom"):
|
||||
coastline = put_current_date_in_filename(name)
|
||||
latest = get_latest_filename(name)
|
||||
coastline_full = os.path.join(env.paths.coastline_path, coastline)
|
||||
latest_full = os.path.join(env.paths.coastline_path, latest)
|
||||
shutil.move(os.path.join(env.paths.coastline_path, name), coastline_full)
|
||||
os.symlink(coastline, latest_full)
|
||||
|
||||
storage.wd_publish(coastline_full, f"{COASTLINE_STORAGE_PATH}/{coastline}")
|
||||
storage.wd_publish(latest_full, f"{COASTLINE_STORAGE_PATH}/{latest}")
|
||||
|
||||
|
||||
def build_coastline(**kwargs):
|
||||
env = Env()
|
||||
kwargs["ti"].xcom_push(key="build_name", value=env.build_name)
|
||||
|
||||
run_generation(
|
||||
env,
|
||||
(
|
||||
sd.StageDownloadAndConvertPlanet(),
|
||||
sd.StageCoastline(use_old_if_fail=False),
|
||||
sd.StageCleanup(),
|
||||
),
|
||||
)
|
||||
env.finish()
|
||||
|
||||
|
||||
BUILD_COASTLINE_TASK = PythonOperator(
|
||||
task_id="Build_coastline_task",
|
||||
provide_context=True,
|
||||
python_callable=build_coastline,
|
||||
on_failure_callback=lambda ctx: rm_build(**ctx),
|
||||
dag=DAG,
|
||||
)
|
||||
|
||||
|
||||
PUBLISH_COASTLINE_TASK = PythonOperator(
|
||||
task_id="Publish_coastline_task",
|
||||
provide_context=True,
|
||||
python_callable=publish_coastline,
|
||||
dag=DAG,
|
||||
)
|
||||
|
||||
|
||||
RM_BUILD_TASK = make_rm_build_task(DAG)
|
||||
|
||||
|
||||
BUILD_COASTLINE_TASK >> PUBLISH_COASTLINE_TASK >> RM_BUILD_TASK
|
||||
154
tools/python/airmaps/dags/build_maps.py
Normal file
154
tools/python/airmaps/dags/build_maps.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
from airmaps.instruments import settings
|
||||
from airmaps.instruments import storage
|
||||
from airmaps.instruments.utils import make_rm_build_task
|
||||
from airmaps.instruments.utils import run_generation_from_first_stage
|
||||
from maps_generator.generator import stages_declaration as sd
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.env import PathProvider
|
||||
from maps_generator.generator.env import get_all_countries_list
|
||||
from maps_generator.maps_generator import run_generation
|
||||
|
||||
logger = logging.getLogger("airmaps")
|
||||
|
||||
|
||||
MAPS_STORAGE_PATH = f"{settings.STORAGE_PREFIX}/maps"
|
||||
|
||||
|
||||
class MapsGenerationDAG(DAG):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
build_prolog_task = PythonOperator(
|
||||
task_id="Build_prolog_task",
|
||||
provide_context=True,
|
||||
python_callable=MapsGenerationDAG.build_prolog,
|
||||
dag=self,
|
||||
)
|
||||
|
||||
build_epilog_task = PythonOperator(
|
||||
task_id="Build_epilog_task",
|
||||
provide_context=True,
|
||||
python_callable=MapsGenerationDAG.build_epilog,
|
||||
dag=self,
|
||||
)
|
||||
|
||||
publish_maps_task = PythonOperator(
|
||||
task_id="Publish_maps_task",
|
||||
provide_context=True,
|
||||
python_callable=MapsGenerationDAG.publish_maps,
|
||||
dag=self,
|
||||
)
|
||||
|
||||
rm_build_task = make_rm_build_task(self)
|
||||
|
||||
build_epilog_task >> publish_maps_task >> rm_build_task
|
||||
for country in get_all_countries_list(PathProvider.borders_path()):
|
||||
build_prolog_task >> self.make_mwm_operator(country) >> build_epilog_task
|
||||
|
||||
@staticmethod
|
||||
def get_params(namespace="env", **kwargs):
|
||||
return kwargs.get("params", {}).get(namespace, {})
|
||||
|
||||
@staticmethod
|
||||
def build_prolog(**kwargs):
|
||||
params = MapsGenerationDAG.get_params(**kwargs)
|
||||
env = Env(**params)
|
||||
kwargs["ti"].xcom_push(key="build_name", value=env.build_name)
|
||||
run_generation(
|
||||
env,
|
||||
(
|
||||
sd.StageDownloadAndConvertPlanet(),
|
||||
sd.StageCoastline(),
|
||||
sd.StagePreprocess(),
|
||||
sd.StageFeatures(),
|
||||
sd.StageDownloadDescriptions(),
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def make_build_mwm_func(country):
|
||||
def build_mwm(**kwargs):
|
||||
build_name = kwargs["ti"].xcom_pull(key="build_name")
|
||||
params = MapsGenerationDAG.get_params(**kwargs)
|
||||
params.update({"build_name": build_name, "countries": [country,]})
|
||||
env = Env(**params)
|
||||
# We need to check existing of mwm.tmp. It is needed if we want to
|
||||
# build mwms from part of planet.
|
||||
tmp_mwm_name = env.get_tmp_mwm_names()
|
||||
assert len(tmp_mwm_name) <= 1
|
||||
if not tmp_mwm_name:
|
||||
logger.warning(f"mwm.tmp does not exist for {country}.")
|
||||
return
|
||||
|
||||
run_generation_from_first_stage(env, (sd.StageMwm(),), build_lock=False)
|
||||
|
||||
return build_mwm
|
||||
|
||||
@staticmethod
|
||||
def build_epilog(**kwargs):
|
||||
build_name = kwargs["ti"].xcom_pull(key="build_name")
|
||||
params = MapsGenerationDAG.get_params(**kwargs)
|
||||
params.update({"build_name": build_name})
|
||||
env = Env(**params)
|
||||
run_generation_from_first_stage(
|
||||
env,
|
||||
(
|
||||
sd.StageCountriesTxt(),
|
||||
sd.StageLocalAds(),
|
||||
sd.StageStatistics(),
|
||||
sd.StageCleanup(),
|
||||
),
|
||||
)
|
||||
env.finish()
|
||||
|
||||
@staticmethod
|
||||
def publish_maps(**kwargs):
|
||||
build_name = kwargs["ti"].xcom_pull(key="build_name")
|
||||
params = MapsGenerationDAG.get_params(**kwargs)
|
||||
params.update({"build_name": build_name})
|
||||
env = Env(**params)
|
||||
subdir = MapsGenerationDAG.get_params(namespace="storage", **kwargs)["subdir"]
|
||||
storage_path = f"{MAPS_STORAGE_PATH}/{subdir}"
|
||||
storage.wd_publish(env.paths.mwm_path, f"{storage_path}/{env.mwm_version}/")
|
||||
|
||||
def make_mwm_operator(self, country):
|
||||
normalized_name = "__".join(country.lower().split())
|
||||
return PythonOperator(
|
||||
task_id=f"Build_country_{normalized_name}_task",
|
||||
provide_context=True,
|
||||
python_callable=MapsGenerationDAG.make_build_mwm_func(country),
|
||||
dag=self,
|
||||
)
|
||||
|
||||
|
||||
PARAMS = {"storage": {"subdir": "open_source"}}
|
||||
if settings.DEBUG:
|
||||
PARAMS["env"] = {
|
||||
# The planet file in debug mode does not contain Russia_Moscow territory.
|
||||
# It is needed for testing.
|
||||
"countries": ["Cuba", "Haiti", "Jamaica", "Cayman Islands", "Russia_Moscow"]
|
||||
}
|
||||
|
||||
OPEN_SOURCE_MAPS_GENERATION_DAG = MapsGenerationDAG(
|
||||
"Generate_open_source_maps",
|
||||
schedule_interval=timedelta(days=7),
|
||||
default_args={
|
||||
"owner": "OMaps",
|
||||
"depends_on_past": True,
|
||||
"start_date": days_ago(0),
|
||||
"email": settings.EMAILS,
|
||||
"email_on_failure": True,
|
||||
"email_on_retry": False,
|
||||
"retries": 0,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
"priority_weight": 1,
|
||||
"params": PARAMS,
|
||||
},
|
||||
)
|
||||
83
tools/python/airmaps/dags/update_planet.py
Normal file
83
tools/python/airmaps/dags/update_planet.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
from airmaps.instruments import settings
|
||||
from airmaps.instruments import storage
|
||||
from airmaps.instruments.utils import make_rm_build_task
|
||||
from maps_generator.generator import stages_declaration as sd
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.maps_generator import run_generation
|
||||
from maps_generator.utils.md5 import md5_ext
|
||||
|
||||
logger = logging.getLogger("airmaps")
|
||||
|
||||
|
||||
DAG = DAG(
|
||||
"Update_planet",
|
||||
schedule_interval=timedelta(days=1),
|
||||
default_args={
|
||||
"owner": "OMaps",
|
||||
"depends_on_past": True,
|
||||
"start_date": days_ago(0),
|
||||
"email": settings.EMAILS,
|
||||
"email_on_failure": True,
|
||||
"email_on_retry": False,
|
||||
"retries": 0,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
"priority_weight": 1,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
PLANET_STORAGE_PATH = f"{settings.STORAGE_PREFIX}/planet_regular/planet-latest.o5m"
|
||||
|
||||
|
||||
def update_planet(**kwargs):
|
||||
env = Env()
|
||||
kwargs["ti"].xcom_push(key="build_name", value=env.build_name)
|
||||
|
||||
if settings.DEBUG:
|
||||
env.add_skipped_stage(sd.StageUpdatePlanet)
|
||||
|
||||
run_generation(
|
||||
env,
|
||||
(
|
||||
sd.StageDownloadAndConvertPlanet(),
|
||||
sd.StageUpdatePlanet(),
|
||||
sd.StageCleanup(),
|
||||
),
|
||||
)
|
||||
env.finish()
|
||||
|
||||
|
||||
def publish_planet(**kwargs):
|
||||
build_name = kwargs["ti"].xcom_pull(key="build_name")
|
||||
env = Env(build_name=build_name)
|
||||
storage.wd_publish(env.paths.planet_o5m, PLANET_STORAGE_PATH)
|
||||
storage.wd_publish(md5_ext(env.paths.planet_o5m), md5_ext(PLANET_STORAGE_PATH))
|
||||
|
||||
|
||||
UPDATE_PLANET_TASK = PythonOperator(
|
||||
task_id="Update_planet_task",
|
||||
provide_context=True,
|
||||
python_callable=update_planet,
|
||||
dag=DAG,
|
||||
)
|
||||
|
||||
|
||||
PUBLISH_PLANET_TASK = PythonOperator(
|
||||
task_id="Publish_planet_task",
|
||||
provide_context=True,
|
||||
python_callable=publish_planet,
|
||||
dag=DAG,
|
||||
)
|
||||
|
||||
|
||||
RM_BUILD_TASK = make_rm_build_task(DAG)
|
||||
|
||||
|
||||
UPDATE_PLANET_TASK >> PUBLISH_PLANET_TASK >> RM_BUILD_TASK
|
||||
62
tools/python/airmaps/instruments/settings.py
Normal file
62
tools/python/airmaps/instruments/settings.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import sys
|
||||
from typing import AnyStr
|
||||
|
||||
from maps_generator.generator import settings
|
||||
|
||||
STORAGE_PREFIX = ""
|
||||
|
||||
# Storage settings
|
||||
WD_HOST = ""
|
||||
WD_LOGIN = ""
|
||||
WD_PASSWORD = ""
|
||||
|
||||
# Common section
|
||||
EMAILS = []
|
||||
|
||||
settings.LOGGING["loggers"]["airmaps"] = {
|
||||
"handlers": ["stdout", "file"],
|
||||
"level": "DEBUG",
|
||||
"propagate": True,
|
||||
}
|
||||
|
||||
|
||||
def get_airmaps_emails(emails: AnyStr):
|
||||
if not emails:
|
||||
return []
|
||||
|
||||
for ch in [",", ";", ":"]:
|
||||
emails.replace(ch, " ")
|
||||
|
||||
return list(filter(None, [e.strpip() for e in emails.join(" ")]))
|
||||
|
||||
|
||||
def init(default_settings_path: AnyStr):
|
||||
settings.init(default_settings_path)
|
||||
|
||||
# Try to read a config and to overload default settings
|
||||
cfg = settings.CfgReader(default_settings_path)
|
||||
|
||||
# Storage section
|
||||
global WD_HOST
|
||||
global WD_LOGIN
|
||||
global WD_PASSWORD
|
||||
|
||||
WD_HOST = cfg.get_opt("Storage", "WD_HOST", WD_HOST)
|
||||
WD_LOGIN = cfg.get_opt("Storage", "WD_LOGIN", WD_LOGIN)
|
||||
WD_PASSWORD = cfg.get_opt("Storage", "WD_PASSWORD", WD_PASSWORD)
|
||||
|
||||
# Common section
|
||||
global EMAILS
|
||||
emails = cfg.get_opt("Common", "EMAILS", "")
|
||||
EMAILS = get_airmaps_emails(emails)
|
||||
|
||||
# Import all contains from maps_generator.generator.settings.
|
||||
thismodule = sys.modules[__name__]
|
||||
for name in dir(settings):
|
||||
if not name.startswith("_") and name.isupper():
|
||||
value = getattr(settings, name)
|
||||
setattr(thismodule, name, value)
|
||||
|
||||
global STORAGE_PREFIX
|
||||
if settings.DEBUG:
|
||||
STORAGE_PREFIX = "/tests"
|
||||
27
tools/python/airmaps/instruments/storage.py
Normal file
27
tools/python/airmaps/instruments/storage.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import logging
|
||||
|
||||
import webdav.client as wc
|
||||
|
||||
from airmaps.instruments import settings
|
||||
|
||||
logger = logging.getLogger("airmaps")
|
||||
|
||||
WD_OPTIONS = {
|
||||
"webdav_hostname": settings.WD_HOST,
|
||||
"webdav_login": settings.WD_LOGIN,
|
||||
"webdav_password": settings.WD_PASSWORD,
|
||||
}
|
||||
|
||||
|
||||
def wd_fetch(src, dst):
|
||||
logger.info(f"Fetch form {src} to {dst} with options {WD_OPTIONS}.")
|
||||
client = wc.Client(WD_OPTIONS)
|
||||
client.download_sync(src, dst)
|
||||
|
||||
|
||||
def wd_publish(src, dst):
|
||||
logger.info(f"Publish form {src} to {dst} with options {WD_OPTIONS}.")
|
||||
client = wc.Client(WD_OPTIONS)
|
||||
tmp = f"{dst[:-1]}__/" if dst[-1] == "/" else f"{dst}__"
|
||||
client.upload_sync(local_path=src, remote_path=tmp)
|
||||
client.move(remote_path_from=tmp, remote_path_to=dst)
|
||||
48
tools/python/airmaps/instruments/utils.py
Normal file
48
tools/python/airmaps/instruments/utils.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from typing import Iterable
|
||||
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.stages import Stage
|
||||
from maps_generator.generator.stages import get_stage_name
|
||||
from maps_generator.maps_generator import run_generation
|
||||
|
||||
|
||||
def put_current_date_in_filename(filename):
|
||||
path, name = os.path.split(filename)
|
||||
parts = name.split(".", maxsplit=1)
|
||||
parts[0] += f"__{datetime.today().strftime('%Y_%m_%d')}"
|
||||
return os.path.join(path, ".".join(parts))
|
||||
|
||||
|
||||
def get_latest_filename(filename, prefix=""):
|
||||
path, name = os.path.split(filename)
|
||||
parts = name.split(".", maxsplit=1)
|
||||
assert len(parts) != 0, parts
|
||||
parts[0] = f"{prefix}latest"
|
||||
return os.path.join(path, ".".join(parts))
|
||||
|
||||
|
||||
def rm_build(**kwargs):
|
||||
build_name = kwargs["ti"].xcom_pull(key="build_name")
|
||||
env = Env(build_name=build_name)
|
||||
shutil.rmtree(env.build_path)
|
||||
|
||||
|
||||
def make_rm_build_task(dag):
|
||||
return PythonOperator(
|
||||
task_id="Rm_build_task",
|
||||
provide_context=True,
|
||||
python_callable=rm_build,
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
|
||||
def run_generation_from_first_stage(
|
||||
env: Env, stages: Iterable[Stage], build_lock: bool = True
|
||||
):
|
||||
from_stage = get_stage_name(next(iter(stages)))
|
||||
run_generation(env, stages, from_stage, build_lock)
|
||||
6
tools/python/airmaps/requirements.txt
Normal file
6
tools/python/airmaps/requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
omim-data-all
|
||||
omim-maps_generator
|
||||
apache-airflow [postgres]==1.10.10
|
||||
psycopg2-binary==2.8.4
|
||||
cryptography>=41.0.0
|
||||
webdavclient==1.0.8
|
||||
5
tools/python/airmaps/requirements_dev.txt
Normal file
5
tools/python/airmaps/requirements_dev.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
-r ../maps_generator/requirements_dev.txt
|
||||
apache-airflow [postgres]==1.10.10
|
||||
psycopg2-binary==2.8.4
|
||||
cryptography>=41.0.0
|
||||
webdavclient==1.0.8
|
||||
4
tools/python/airmaps/sandbox/.dockerignore
Normal file
4
tools/python/airmaps/sandbox/.dockerignore
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
./.git*
|
||||
./android
|
||||
./iphone
|
||||
./xcode
|
||||
92
tools/python/airmaps/sandbox/README.md
Normal file
92
tools/python/airmaps/sandbox/README.md
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Sandbox
|
||||
|
||||
This project can show how airmaps works on your computer.
|
||||
|
||||
## Setup
|
||||
|
||||
You must have [docker](https://docs.docker.com/get-docker/) and [docker-compose](https://docs.docker.com/compose/install/).
|
||||
|
||||
0. Change working directory:
|
||||
|
||||
```sh
|
||||
$ cd omim/tools/python/airmaps/sandbox
|
||||
```
|
||||
|
||||
1. Build airmaps service:
|
||||
|
||||
```sh
|
||||
sandbox$ ./build.sh
|
||||
```
|
||||
|
||||
2. Create storage(sandbox/storage directory):
|
||||
|
||||
```sh
|
||||
sandbox$ ./create_storage.sh
|
||||
```
|
||||
|
||||
Note: May be you need `sudo`, because `./create_storage.sh` to try change an owner of `sandbox/storage/tests` directory.
|
||||
|
||||
## Usage
|
||||
|
||||
### Starting
|
||||
|
||||
0. Change working directory:
|
||||
|
||||
```sh
|
||||
$ cd omim/tools/python/airmaps/sandbox
|
||||
```
|
||||
|
||||
1. Run all services:
|
||||
|
||||
```sh
|
||||
sandbox$ docker-compose up
|
||||
```
|
||||
|
||||
2. Open http://localhost in your browser.
|
||||
|
||||
Note: You can see the results of airmaps working in `sandbox/storage/tests`.
|
||||
|
||||
### Stopping
|
||||
|
||||
0. Change working directory:
|
||||
|
||||
```sh
|
||||
$ cd omim/tools/python/airmaps/sandbox
|
||||
```
|
||||
|
||||
1. Stop all services:
|
||||
Push Ctrl+C and
|
||||
|
||||
```sh
|
||||
sandbox$ docker-compose down
|
||||
```
|
||||
|
||||
### Clean
|
||||
|
||||
#### Clean storage and intermediate files:
|
||||
|
||||
0. Change working directory:
|
||||
|
||||
```sh
|
||||
$ cd omim/tools/python/airmaps/sandbox
|
||||
```
|
||||
|
||||
1. Clean storage and intermediate files:
|
||||
|
||||
```sh
|
||||
sandbox$ ./clean.sh
|
||||
```
|
||||
|
||||
#### Remove images:
|
||||
|
||||
0. Change working directory:
|
||||
|
||||
```sh
|
||||
$ cd omim/tools/python/airmaps/sandbox
|
||||
```
|
||||
|
||||
1. Remove images:
|
||||
|
||||
```sh
|
||||
sandbox$ docker-compose rm
|
||||
```
|
||||
28
tools/python/airmaps/sandbox/airmaps/Dockerfile
Normal file
28
tools/python/airmaps/sandbox/airmaps/Dockerfile
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
FROM python:3.6
|
||||
|
||||
ARG TZ=Etc/UTC
|
||||
|
||||
WORKDIR /omim/
|
||||
|
||||
ADD . .
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
cmake \
|
||||
libgl1-mesa-dev \
|
||||
libsqlite3-dev \
|
||||
qt5-default \
|
||||
zlib1g-dev \
|
||||
tzdata \
|
||||
locales-all
|
||||
|
||||
RUN ln -fs /usr/share/zoneinfo/$TZ /etc/localtime && \
|
||||
dpkg-reconfigure --frontend noninteractive tzdata
|
||||
|
||||
RUN ./tools/unix/build_omim.sh -rs generator_tool
|
||||
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
RUN pip install werkzeug==0.16.0 \
|
||||
SQLAlchemy==1.3.15 \
|
||||
-r ./tools/python/airmaps/requirements_dev.txt
|
||||
856
tools/python/airmaps/sandbox/airmaps/airflow.cfg
Normal file
856
tools/python/airmaps/sandbox/airmaps/airflow.cfg
Normal file
|
|
@ -0,0 +1,856 @@
|
|||
[core]
|
||||
# The folder where your airflow pipelines live, most likely a
|
||||
# subfolder in a code repository
|
||||
# This path must be absolute
|
||||
dags_folder = /omim/tools/python/airmaps/dags
|
||||
|
||||
# The folder where airflow should store its log files
|
||||
# This path must be absolute
|
||||
base_log_folder = /airflow_home/logs
|
||||
|
||||
# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search.
|
||||
# Users must supply an Airflow connection id that provides access to the storage
|
||||
# location. If remote_logging is set to true, see UPDATING.md for additional
|
||||
# configuration requirements.
|
||||
remote_logging = False
|
||||
remote_log_conn_id =
|
||||
remote_base_log_folder =
|
||||
encrypt_s3_logs = False
|
||||
|
||||
# Logging level
|
||||
logging_level = INFO
|
||||
fab_logging_level = WARN
|
||||
|
||||
# Logging class
|
||||
# Specify the class that will specify the logging configuration
|
||||
# This class has to be on the python classpath
|
||||
# logging_config_class = my.path.default_local_settings.LOGGING_CONFIG
|
||||
logging_config_class =
|
||||
|
||||
# Log format
|
||||
# Colour the logs when the controlling terminal is a TTY.
|
||||
colored_console_log = True
|
||||
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
|
||||
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
|
||||
|
||||
log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
|
||||
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
|
||||
|
||||
# Log filename format
|
||||
log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
|
||||
log_processor_filename_template = {{ filename }}.log
|
||||
dag_processor_manager_log_location = /airflow_home/logs/dag_processor_manager/dag_processor_manager.log
|
||||
|
||||
# Hostname by providing a path to a callable, which will resolve the hostname
|
||||
# The format is "package:function". For example,
|
||||
# default value "socket:getfqdn" means that result from getfqdn() of "socket" package will be used as hostname
|
||||
# No argument should be required in the function specified.
|
||||
# If using IP address as hostname is preferred, use value "airflow.utils.net:get_host_ip_address"
|
||||
hostname_callable = socket:getfqdn
|
||||
|
||||
# Default timezone in case supplied date times are naive
|
||||
# can be utc (default), system, or any IANA timezone string (e.g. Europe/Amsterdam)
|
||||
default_timezone = system
|
||||
|
||||
# The executor class that airflow should use. Choices include
|
||||
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor, KubernetesExecutor
|
||||
executor = LocalExecutor
|
||||
|
||||
# The SqlAlchemy connection string to the metadata database.
|
||||
# SqlAlchemy supports many different database engine, more information
|
||||
# their website
|
||||
sql_alchemy_conn = postgresql+psycopg2://postgres:postgres@db:5432/airflow
|
||||
|
||||
# The encoding for the databases
|
||||
sql_engine_encoding = utf-8
|
||||
|
||||
# If SqlAlchemy should pool database connections.
|
||||
sql_alchemy_pool_enabled = True
|
||||
|
||||
# The SqlAlchemy pool size is the maximum number of database connections
|
||||
# in the pool. 0 indicates no limit.
|
||||
sql_alchemy_pool_size = 5
|
||||
|
||||
# The maximum overflow size of the pool.
|
||||
# When the number of checked-out connections reaches the size set in pool_size,
|
||||
# additional connections will be returned up to this limit.
|
||||
# When those additional connections are returned to the pool, they are disconnected and discarded.
|
||||
# It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow,
|
||||
# and the total number of "sleeping" connections the pool will allow is pool_size.
|
||||
# max_overflow can be set to -1 to indicate no overflow limit;
|
||||
# no limit will be placed on the total number of concurrent connections. Defaults to 10.
|
||||
sql_alchemy_max_overflow = 10
|
||||
|
||||
# The SqlAlchemy pool recycle is the number of seconds a connection
|
||||
# can be idle in the pool before it is invalidated. This config does
|
||||
# not apply to sqlite. If the number of DB connections is ever exceeded,
|
||||
# a lower config value will allow the system to recover faster.
|
||||
sql_alchemy_pool_recycle = 1800
|
||||
|
||||
# Check connection at the start of each connection pool checkout.
|
||||
# Typically, this is a simple statement like “SELECT 1”.
|
||||
# More information here: https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic
|
||||
sql_alchemy_pool_pre_ping = True
|
||||
|
||||
# The schema to use for the metadata database
|
||||
# SqlAlchemy supports databases with the concept of multiple schemas.
|
||||
sql_alchemy_schema =
|
||||
|
||||
# The amount of parallelism as a setting to the executor. This defines
|
||||
# the max number of task instances that should run simultaneously
|
||||
# on this airflow installation
|
||||
parallelism = 32
|
||||
|
||||
# The number of task instances allowed to run concurrently by the scheduler
|
||||
dag_concurrency = 16
|
||||
|
||||
# Are DAGs paused by default at creation
|
||||
dags_are_paused_at_creation = True
|
||||
|
||||
# The maximum number of active DAG runs per DAG
|
||||
max_active_runs_per_dag = 16
|
||||
|
||||
# Whether to load the examples that ship with Airflow. It's good to
|
||||
# get started, but you probably want to set this to False in a production
|
||||
# environment
|
||||
load_examples = False
|
||||
|
||||
# Where your Airflow plugins are stored
|
||||
plugins_folder = /airflow_home/plugins
|
||||
|
||||
# Secret key to save connection passwords in the db
|
||||
fernet_key = uoTKzPCjhVBsERkDylXY5g1hYeg7OAYjk_a_ek2YMwQ=
|
||||
|
||||
# Whether to disable pickling dags
|
||||
donot_pickle = False
|
||||
|
||||
# How long before timing out a python file import
|
||||
dagbag_import_timeout = 30
|
||||
|
||||
# How long before timing out a DagFileProcessor, which processes a dag file
|
||||
dag_file_processor_timeout = 50
|
||||
|
||||
# The class to use for running task instances in a subprocess
|
||||
task_runner = StandardTaskRunner
|
||||
|
||||
# If set, tasks without a `run_as_user` argument will be run with this user
|
||||
# Can be used to de-elevate a sudo user running Airflow when executing tasks
|
||||
default_impersonation =
|
||||
|
||||
# What security module to use (for example kerberos):
|
||||
security =
|
||||
|
||||
# If set to False enables some unsecure features like Charts and Ad Hoc Queries.
|
||||
# In 2.0 will default to True.
|
||||
secure_mode = False
|
||||
|
||||
# Turn unit test mode on (overwrites many configuration options with test
|
||||
# values at runtime)
|
||||
unit_test_mode = False
|
||||
|
||||
# Name of handler to read task instance logs.
|
||||
# Default to use task handler.
|
||||
task_log_reader = task
|
||||
|
||||
# Whether to enable pickling for xcom (note that this is insecure and allows for
|
||||
# RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False).
|
||||
enable_xcom_pickling = True
|
||||
|
||||
# When a task is killed forcefully, this is the amount of time in seconds that
|
||||
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
|
||||
killed_task_cleanup_time = 60
|
||||
|
||||
# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or
|
||||
# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params.
|
||||
dag_run_conf_overrides_params = False
|
||||
|
||||
# Worker initialisation check to validate Metadata Database connection
|
||||
worker_precheck = False
|
||||
|
||||
# When discovering DAGs, ignore any files that don't contain the strings `DAG` and `airflow`.
|
||||
dag_discovery_safe_mode = True
|
||||
|
||||
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
|
||||
default_task_retries = 0
|
||||
|
||||
|
||||
[cli]
|
||||
# In what way should the cli access the API. The LocalClient will use the
|
||||
# database directly, while the json_client will use the api running on the
|
||||
# webserver
|
||||
api_client = airflow.api.client.local_client
|
||||
|
||||
# If you set web_server_url_prefix, do NOT forget to append it here, ex:
|
||||
# endpoint_url = http://localhost:8080/myroot
|
||||
# So api will look like: http://localhost:8080/myroot/api/experimental/...
|
||||
endpoint_url = http://localhost:8080
|
||||
|
||||
[api]
|
||||
# How to authenticate users of the API
|
||||
auth_backend = airflow.api.auth.backend.default
|
||||
|
||||
[lineage]
|
||||
# what lineage backend to use
|
||||
backend =
|
||||
|
||||
[atlas]
|
||||
sasl_enabled = False
|
||||
host =
|
||||
port = 21000
|
||||
username =
|
||||
password =
|
||||
|
||||
[operators]
|
||||
# The default owner assigned to each new operator, unless
|
||||
# provided explicitly or passed via `default_args`
|
||||
default_owner = airflow
|
||||
default_cpus = 1
|
||||
default_ram = 512
|
||||
default_disk = 512
|
||||
default_gpus = 0
|
||||
|
||||
[hive]
|
||||
# Default mapreduce queue for HiveOperator tasks
|
||||
default_hive_mapred_queue =
|
||||
|
||||
[webserver]
|
||||
# The base url of your website as airflow cannot guess what domain or
|
||||
# cname you are using. This is used in automated emails that
|
||||
# airflow sends to point links to the right web server
|
||||
base_url = http://localhost:8080
|
||||
|
||||
# Default timezone to display all dates in the UI, can be UTC, system, or
|
||||
# any IANA timezone string (e.g. Europe/Amsterdam). If left empty the
|
||||
# default value of core/default_timezone will be used
|
||||
# Example: default_ui_timezone = America/New_York
|
||||
default_ui_timezone = system
|
||||
|
||||
# The ip specified when starting the web server
|
||||
web_server_host = 0.0.0.0
|
||||
|
||||
# The port on which to run the web server
|
||||
web_server_port = 8080
|
||||
|
||||
# Paths to the SSL certificate and key for the web server. When both are
|
||||
# provided SSL will be enabled. This does not change the web server port.
|
||||
web_server_ssl_cert =
|
||||
web_server_ssl_key =
|
||||
|
||||
# Number of seconds the webserver waits before killing gunicorn master that doesn't respond
|
||||
web_server_master_timeout = 120
|
||||
|
||||
# Number of seconds the gunicorn webserver waits before timing out on a worker
|
||||
web_server_worker_timeout = 120
|
||||
|
||||
# Number of workers to refresh at a time. When set to 0, worker refresh is
|
||||
# disabled. When nonzero, airflow periodically refreshes webserver workers by
|
||||
# bringing up new ones and killing old ones.
|
||||
worker_refresh_batch_size = 1
|
||||
|
||||
# Number of seconds to wait before refreshing a batch of workers.
|
||||
worker_refresh_interval = 30
|
||||
|
||||
# Secret key used to run your flask app
|
||||
secret_key = temporary_key
|
||||
|
||||
# Number of workers to run the Gunicorn web server
|
||||
workers = 4
|
||||
|
||||
# The worker class gunicorn should use. Choices include
|
||||
# sync (default), eventlet, gevent
|
||||
worker_class = sync
|
||||
|
||||
# Log files for the gunicorn webserver. '-' means log to stderr.
|
||||
access_logfile = -
|
||||
error_logfile = -
|
||||
|
||||
# Expose the configuration file in the web server
|
||||
# This is only applicable for the flask-admin based web UI (non FAB-based).
|
||||
# In the FAB-based web UI with RBAC feature,
|
||||
# access to configuration is controlled by role permissions.
|
||||
expose_config = False
|
||||
|
||||
# Set to true to turn on authentication:
|
||||
# https://airflow.apache.org/security.html#web-authentication
|
||||
authenticate = False
|
||||
|
||||
# Filter the list of dags by owner name (requires authentication to be enabled)
|
||||
filter_by_owner = False
|
||||
|
||||
# Filtering mode. Choices include user (default) and ldapgroup.
|
||||
# Ldap group filtering requires using the ldap backend
|
||||
#
|
||||
# Note that the ldap server needs the "memberOf" overlay to be set up
|
||||
# in order to user the ldapgroup mode.
|
||||
owner_mode = user
|
||||
|
||||
# Default DAG view. Valid values are:
|
||||
# tree, graph, duration, gantt, landing_times
|
||||
dag_default_view = tree
|
||||
|
||||
# Default DAG orientation. Valid values are:
|
||||
# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top)
|
||||
dag_orientation = LR
|
||||
|
||||
# Puts the webserver in demonstration mode; blurs the names of Operators for
|
||||
# privacy.
|
||||
demo_mode = False
|
||||
|
||||
# The amount of time (in secs) webserver will wait for initial handshake
|
||||
# while fetching logs from other worker machine
|
||||
log_fetch_timeout_sec = 5
|
||||
|
||||
# By default, the webserver shows paused DAGs. Flip this to hide paused
|
||||
# DAGs by default
|
||||
hide_paused_dags_by_default = False
|
||||
|
||||
# Consistent page size across all listing views in the UI
|
||||
page_size = 100
|
||||
|
||||
# Use FAB-based webserver with RBAC feature
|
||||
rbac = False
|
||||
|
||||
# Define the color of navigation bar
|
||||
navbar_color = #007A87
|
||||
|
||||
# Default dagrun to show in UI
|
||||
default_dag_run_display_number = 25
|
||||
|
||||
# Enable werkzeug `ProxyFix` middleware
|
||||
enable_proxy_fix = False
|
||||
|
||||
# Set secure flag on session cookie
|
||||
cookie_secure = False
|
||||
|
||||
# Set samesite policy on session cookie
|
||||
cookie_samesite =
|
||||
|
||||
# Default setting for wrap toggle on DAG code and TI log views.
|
||||
default_wrap = False
|
||||
|
||||
# Send anonymous user activity to your analytics tool
|
||||
# analytics_tool = # choose from google_analytics, segment, or metarouter
|
||||
# analytics_id = XXXXXXXXXXX
|
||||
|
||||
[email]
|
||||
email_backend = airflow.utils.email.send_email_smtp
|
||||
|
||||
|
||||
[smtp]
|
||||
# If you want airflow to send emails on retries, failure, and you want to use
|
||||
# the airflow.utils.email.send_email_smtp function, you have to configure an
|
||||
# smtp server here
|
||||
smtp_host = localhost
|
||||
smtp_starttls = True
|
||||
smtp_ssl = False
|
||||
# Uncomment and set the user/pass settings if you want to use SMTP AUTH
|
||||
# smtp_user = airflow
|
||||
# smtp_password = airflow
|
||||
smtp_port = 25
|
||||
smtp_mail_from = airflow@example.com
|
||||
|
||||
[sentry]
|
||||
# Sentry (https://docs.sentry.io) integration
|
||||
sentry_dsn =
|
||||
|
||||
|
||||
[celery]
|
||||
# This section only applies if you are using the CeleryExecutor in
|
||||
# [core] section above
|
||||
|
||||
# The app name that will be used by celery
|
||||
celery_app_name = airflow.executors.celery_executor
|
||||
|
||||
# The concurrency that will be used when starting workers with the
|
||||
# "airflow worker" command. This defines the number of task instances that
|
||||
# a worker will take, so size up your workers based on the resources on
|
||||
# your worker box and the nature of your tasks
|
||||
worker_concurrency = 40
|
||||
|
||||
# The maximum and minimum concurrency that will be used when starting workers with the
|
||||
# "airflow worker" command (always keep minimum processes, but grow to maximum if necessary).
|
||||
# Note the value should be "max_concurrency,min_concurrency"
|
||||
# Pick these numbers based on resources on worker box and the nature of the task.
|
||||
# If autoscale option is available, worker_concurrency will be ignored.
|
||||
# http://docs.celeryproject.org/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale
|
||||
# worker_autoscale = 16,12
|
||||
|
||||
# When you start an airflow worker, airflow starts a tiny web server
|
||||
# subprocess to serve the workers local log files to the airflow main
|
||||
# web server, who then builds pages and sends them to users. This defines
|
||||
# the port on which the logs are served. It needs to be unused, and open
|
||||
# visible from the main web server to connect into the workers.
|
||||
worker_log_server_port = 8793
|
||||
|
||||
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
|
||||
# a sqlalchemy database. Refer to the Celery documentation for more
|
||||
# information.
|
||||
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings
|
||||
broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow
|
||||
|
||||
# The Celery result_backend. When a job finishes, it needs to update the
|
||||
# metadata of the job. Therefore it will post a message on a message bus,
|
||||
# or insert it into a database (depending of the backend)
|
||||
# This status is used by the scheduler to update the state of the task
|
||||
# The use of a database is highly recommended
|
||||
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings
|
||||
result_backend = db+mysql://airflow:airflow@localhost:3306/airflow
|
||||
|
||||
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
|
||||
# it `airflow flower`. This defines the IP that Celery Flower runs on
|
||||
flower_host = 0.0.0.0
|
||||
|
||||
# The root URL for Flower
|
||||
# Ex: flower_url_prefix = /flower
|
||||
flower_url_prefix =
|
||||
|
||||
# This defines the port that Celery Flower runs on
|
||||
flower_port = 5555
|
||||
|
||||
# Securing Flower with Basic Authentication
|
||||
# Accepts user:password pairs separated by a comma
|
||||
# Example: flower_basic_auth = user1:password1,user2:password2
|
||||
flower_basic_auth =
|
||||
|
||||
# Default queue that tasks get assigned to and that worker listen on.
|
||||
default_queue = default
|
||||
|
||||
# How many processes CeleryExecutor uses to sync task state.
|
||||
# 0 means to use max(1, number of cores - 1) processes.
|
||||
sync_parallelism = 0
|
||||
|
||||
# Import path for celery configuration options
|
||||
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
|
||||
|
||||
# In case of using SSL
|
||||
ssl_active = False
|
||||
ssl_key =
|
||||
ssl_cert =
|
||||
ssl_cacert =
|
||||
|
||||
# Celery Pool implementation.
|
||||
# Choices include: prefork (default), eventlet, gevent or solo.
|
||||
# See:
|
||||
# https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency
|
||||
# https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html
|
||||
pool = prefork
|
||||
|
||||
[celery_broker_transport_options]
|
||||
# This section is for specifying options which can be passed to the
|
||||
# underlying celery broker transport. See:
|
||||
# http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_transport_options
|
||||
|
||||
# The visibility timeout defines the number of seconds to wait for the worker
|
||||
# to acknowledge the task before the message is redelivered to another worker.
|
||||
# Make sure to increase the visibility timeout to match the time of the longest
|
||||
# ETA you're planning to use.
|
||||
#
|
||||
# visibility_timeout is only supported for Redis and SQS celery brokers.
|
||||
# See:
|
||||
# http://docs.celeryproject.org/en/master/userguide/configuration.html#std:setting-broker_transport_options
|
||||
#
|
||||
#visibility_timeout = 21600
|
||||
|
||||
[dask]
|
||||
# This section only applies if you are using the DaskExecutor in
|
||||
# [core] section above
|
||||
|
||||
# The IP address and port of the Dask cluster's scheduler.
|
||||
cluster_address = 127.0.0.1:8786
|
||||
# TLS/ SSL settings to access a secured Dask scheduler.
|
||||
tls_ca =
|
||||
tls_cert =
|
||||
tls_key =
|
||||
|
||||
|
||||
[scheduler]
|
||||
# Task instances listen for external kill signal (when you clear tasks
|
||||
# from the CLI or the UI), this defines the frequency at which they should
|
||||
# listen (in seconds).
|
||||
job_heartbeat_sec = 5
|
||||
|
||||
# The scheduler constantly tries to trigger new tasks (look at the
|
||||
# scheduler section in the docs for more information). This defines
|
||||
# how often the scheduler should run (in seconds).
|
||||
scheduler_heartbeat_sec = 5
|
||||
|
||||
# after how much time should the scheduler terminate in seconds
|
||||
# -1 indicates to run continuously (see also num_runs)
|
||||
run_duration = -1
|
||||
|
||||
# The number of times to try to schedule each DAG file
|
||||
# -1 indicates unlimited number
|
||||
num_runs = -1
|
||||
|
||||
# The number of seconds to wait between consecutive DAG file processing
|
||||
processor_poll_interval = 1
|
||||
|
||||
# after how much time (seconds) a new DAGs should be picked up from the filesystem
|
||||
min_file_process_interval = 0
|
||||
|
||||
# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes.
|
||||
dag_dir_list_interval = 300
|
||||
|
||||
# How often should stats be printed to the logs
|
||||
print_stats_interval = 30
|
||||
|
||||
# If the last scheduler heartbeat happened more than scheduler_health_check_threshold ago (in seconds),
|
||||
# scheduler is considered unhealthy.
|
||||
# This is used by the health check in the "/health" endpoint
|
||||
scheduler_health_check_threshold = 30
|
||||
|
||||
child_process_log_directory = /airflow_home/logs/scheduler
|
||||
|
||||
# Local task jobs periodically heartbeat to the DB. If the job has
|
||||
# not heartbeat in this many seconds, the scheduler will mark the
|
||||
# associated task instance as failed and will re-schedule the task.
|
||||
scheduler_zombie_task_threshold = 300
|
||||
|
||||
# Turn off scheduler catchup by setting this to False.
|
||||
# Default behavior is unchanged and
|
||||
# Command Line Backfills still work, but the scheduler
|
||||
# will not do scheduler catchup if this is False,
|
||||
# however it can be set on a per DAG basis in the
|
||||
# DAG definition (catchup)
|
||||
catchup_by_default = True
|
||||
|
||||
# This changes the batch size of queries in the scheduling main loop.
|
||||
# If this is too high, SQL query performance may be impacted by one
|
||||
# or more of the following:
|
||||
# - reversion to full table scan
|
||||
# - complexity of query predicate
|
||||
# - excessive locking
|
||||
#
|
||||
# Additionally, you may hit the maximum allowable query length for your db.
|
||||
#
|
||||
# Set this to 0 for no limit (not advised)
|
||||
max_tis_per_query = 512
|
||||
|
||||
# Statsd (https://github.com/etsy/statsd) integration settings
|
||||
statsd_on = False
|
||||
statsd_host = localhost
|
||||
statsd_port = 8125
|
||||
statsd_prefix = airflow
|
||||
|
||||
# If you want to avoid send all the available metrics to StatsD,
|
||||
# you can configure an allow list of prefixes to send only the metrics that
|
||||
# start with the elements of the list (e.g: scheduler,executor,dagrun)
|
||||
statsd_allow_list =
|
||||
|
||||
# The scheduler can run multiple threads in parallel to schedule dags.
|
||||
# This defines how many threads will run.
|
||||
max_threads = 8
|
||||
|
||||
authenticate = False
|
||||
|
||||
# Turn off scheduler use of cron intervals by setting this to False.
|
||||
# DAGs submitted manually in the web UI or with trigger_dag will still run.
|
||||
use_job_schedule = True
|
||||
|
||||
[ldap]
|
||||
# set this to ldaps://<your.ldap.server>:<port>
|
||||
uri =
|
||||
user_filter = objectClass=*
|
||||
user_name_attr = uid
|
||||
group_member_attr = memberOf
|
||||
superuser_filter =
|
||||
data_profiler_filter =
|
||||
bind_user = cn=Manager,dc=example,dc=com
|
||||
bind_password = insecure
|
||||
basedn = dc=example,dc=com
|
||||
cacert = /etc/ca/ldap_ca.crt
|
||||
search_scope = LEVEL
|
||||
|
||||
# This setting allows the use of LDAP servers that either return a
|
||||
# broken schema, or do not return a schema.
|
||||
ignore_malformed_schema = False
|
||||
|
||||
[mesos]
|
||||
# Mesos master address which MesosExecutor will connect to.
|
||||
master = localhost:5050
|
||||
|
||||
# The framework name which Airflow scheduler will register itself as on mesos
|
||||
framework_name = Airflow
|
||||
|
||||
# Number of cpu cores required for running one task instance using
|
||||
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
|
||||
# command on a mesos slave
|
||||
task_cpu = 1
|
||||
|
||||
# Memory in MB required for running one task instance using
|
||||
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
|
||||
# command on a mesos slave
|
||||
task_memory = 256
|
||||
|
||||
# Enable framework checkpointing for mesos
|
||||
# See http://mesos.apache.org/documentation/latest/slave-recovery/
|
||||
checkpoint = False
|
||||
|
||||
# Failover timeout in milliseconds.
|
||||
# When checkpointing is enabled and this option is set, Mesos waits
|
||||
# until the configured timeout for
|
||||
# the MesosExecutor framework to re-register after a failover. Mesos
|
||||
# shuts down running tasks if the
|
||||
# MesosExecutor framework fails to re-register within this timeframe.
|
||||
# failover_timeout = 604800
|
||||
|
||||
# Enable framework authentication for mesos
|
||||
# See http://mesos.apache.org/documentation/latest/configuration/
|
||||
authenticate = False
|
||||
|
||||
# Mesos credentials, if authentication is enabled
|
||||
# default_principal = admin
|
||||
# default_secret = admin
|
||||
|
||||
# Optional Docker Image to run on slave before running the command
|
||||
# This image should be accessible from mesos slave i.e mesos slave
|
||||
# should be able to pull this docker image before executing the command.
|
||||
# docker_image_slave = puckel/docker-airflow
|
||||
|
||||
[kerberos]
|
||||
ccache = /tmp/airflow_krb5_ccache
|
||||
# gets augmented with fqdn
|
||||
principal = airflow
|
||||
reinit_frequency = 3600
|
||||
kinit_path = kinit
|
||||
keytab = airflow.keytab
|
||||
|
||||
|
||||
[github_enterprise]
|
||||
api_rev = v3
|
||||
|
||||
[admin]
|
||||
# UI to hide sensitive variable fields when set to True
|
||||
hide_sensitive_variable_fields = True
|
||||
|
||||
[elasticsearch]
|
||||
# Elasticsearch host
|
||||
host =
|
||||
# Format of the log_id, which is used to query for a given tasks logs
|
||||
log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
|
||||
# Used to mark the end of a log stream for a task
|
||||
end_of_log_mark = end_of_log
|
||||
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
|
||||
# Code will construct log_id using the log_id template from the argument above.
|
||||
# NOTE: The code will prefix the https:// automatically, don't include that here.
|
||||
frontend =
|
||||
# Write the task logs to the stdout of the worker, rather than the default files
|
||||
write_stdout = False
|
||||
# Instead of the default log formatter, write the log lines as JSON
|
||||
json_format = False
|
||||
# Log fields to also attach to the json output, if enabled
|
||||
json_fields = asctime, filename, lineno, levelname, message
|
||||
|
||||
[elasticsearch_configs]
|
||||
|
||||
use_ssl = False
|
||||
verify_certs = True
|
||||
|
||||
[kubernetes]
|
||||
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
|
||||
worker_container_repository =
|
||||
worker_container_tag =
|
||||
worker_container_image_pull_policy = IfNotPresent
|
||||
|
||||
# If True (default), worker pods will be deleted upon termination
|
||||
delete_worker_pods = True
|
||||
|
||||
# Number of Kubernetes Worker Pod creation calls per scheduler loop
|
||||
worker_pods_creation_batch_size = 1
|
||||
|
||||
# The Kubernetes namespace where airflow workers should be created. Defaults to `default`
|
||||
namespace = default
|
||||
|
||||
# The name of the Kubernetes ConfigMap Containing the Airflow Configuration (this file)
|
||||
airflow_configmap =
|
||||
|
||||
# For docker image already contains DAGs, this is set to `True`, and the worker will search for dags in dags_folder,
|
||||
# otherwise use git sync or dags volume claim to mount DAGs
|
||||
dags_in_image = False
|
||||
|
||||
# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs
|
||||
dags_volume_subpath =
|
||||
|
||||
# For DAGs mounted via a volume claim (mutually exclusive with git-sync and host path)
|
||||
dags_volume_claim =
|
||||
|
||||
# For volume mounted logs, the worker will look in this subpath for logs
|
||||
logs_volume_subpath =
|
||||
|
||||
# A shared volume claim for the logs
|
||||
logs_volume_claim =
|
||||
|
||||
# For DAGs mounted via a hostPath volume (mutually exclusive with volume claim and git-sync)
|
||||
# Useful in local environment, discouraged in production
|
||||
dags_volume_host =
|
||||
|
||||
# A hostPath volume for the logs
|
||||
# Useful in local environment, discouraged in production
|
||||
logs_volume_host =
|
||||
|
||||
# A list of configMapsRefs to envFrom. If more than one configMap is
|
||||
# specified, provide a comma separated list: configmap_a,configmap_b
|
||||
env_from_configmap_ref =
|
||||
|
||||
# A list of secretRefs to envFrom. If more than one secret is
|
||||
# specified, provide a comma separated list: secret_a,secret_b
|
||||
env_from_secret_ref =
|
||||
|
||||
# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim)
|
||||
git_repo =
|
||||
git_branch =
|
||||
git_subpath =
|
||||
# Use git_user and git_password for user authentication or git_ssh_key_secret_name and git_ssh_key_secret_key
|
||||
# for SSH authentication
|
||||
git_user =
|
||||
git_password =
|
||||
git_sync_root = /git
|
||||
git_sync_dest = repo
|
||||
git_dags_folder_mount_point =
|
||||
|
||||
# To get Git-sync SSH authentication set up follow this format
|
||||
#
|
||||
# airflow-secrets.yaml:
|
||||
# ---
|
||||
# apiVersion: v1
|
||||
# kind: Secret
|
||||
# metadata:
|
||||
# name: airflow-secrets
|
||||
# data:
|
||||
# # key needs to be gitSshKey
|
||||
# gitSshKey: <base64_encoded_data>
|
||||
# ---
|
||||
# airflow-configmap.yaml:
|
||||
# apiVersion: v1
|
||||
# kind: ConfigMap
|
||||
# metadata:
|
||||
# name: airflow-configmap
|
||||
# data:
|
||||
# known_hosts: |
|
||||
# github.com ssh-rsa <...>
|
||||
# airflow.cfg: |
|
||||
# ...
|
||||
#
|
||||
# git_ssh_key_secret_name = airflow-secrets
|
||||
# git_ssh_known_hosts_configmap_name = airflow-configmap
|
||||
git_ssh_key_secret_name =
|
||||
git_ssh_known_hosts_configmap_name =
|
||||
|
||||
# To give the git_sync init container credentials via a secret, create a secret
|
||||
# with two fields: GIT_SYNC_USERNAME and GIT_SYNC_PASSWORD (example below) and
|
||||
# add `git_sync_credentials_secret = <secret_name>` to your airflow config under the kubernetes section
|
||||
#
|
||||
# Secret Example:
|
||||
# apiVersion: v1
|
||||
# kind: Secret
|
||||
# metadata:
|
||||
# name: git-credentials
|
||||
# data:
|
||||
# GIT_SYNC_USERNAME: <base64_encoded_git_username>
|
||||
# GIT_SYNC_PASSWORD: <base64_encoded_git_password>
|
||||
git_sync_credentials_secret =
|
||||
|
||||
# For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync
|
||||
git_sync_container_repository = k8s.gcr.io/git-sync
|
||||
git_sync_container_tag = v3.1.1
|
||||
git_sync_init_container_name = git-sync-clone
|
||||
git_sync_run_as_user = 65533
|
||||
|
||||
# The name of the Kubernetes service account to be associated with airflow workers, if any.
|
||||
# Service accounts are required for workers that require access to secrets or cluster resources.
|
||||
# See the Kubernetes RBAC documentation for more:
|
||||
# https://kubernetes.io/docs/admin/authorization/rbac/
|
||||
worker_service_account_name =
|
||||
|
||||
# Any image pull secrets to be given to worker pods, If more than one secret is
|
||||
# required, provide a comma separated list: secret_a,secret_b
|
||||
image_pull_secrets =
|
||||
|
||||
# GCP Service Account Keys to be provided to tasks run on Kubernetes Executors
|
||||
# Should be supplied in the format: key-name-1:key-path-1,key-name-2:key-path-2
|
||||
gcp_service_account_keys =
|
||||
|
||||
# Use the service account kubernetes gives to pods to connect to kubernetes cluster.
|
||||
# It's intended for clients that expect to be running inside a pod running on kubernetes.
|
||||
# It will raise an exception if called from a process not running in a kubernetes environment.
|
||||
in_cluster = True
|
||||
|
||||
# When running with in_cluster=False change the default cluster_context or config_file
|
||||
# options to Kubernetes client. Leave blank these to use default behaviour like `kubectl` has.
|
||||
# cluster_context =
|
||||
# config_file =
|
||||
|
||||
|
||||
# Affinity configuration as a single line formatted JSON object.
|
||||
# See the affinity model for top-level key names (e.g. `nodeAffinity`, etc.):
|
||||
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#affinity-v1-core
|
||||
affinity =
|
||||
|
||||
# A list of toleration objects as a single line formatted JSON array
|
||||
# See:
|
||||
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#toleration-v1-core
|
||||
tolerations =
|
||||
|
||||
# **kwargs parameters to pass while calling a kubernetes client core_v1_api methods from Kubernetes Executor
|
||||
# provided as a single line formatted JSON dictionary string.
|
||||
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
|
||||
# See:
|
||||
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
|
||||
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely for kubernetes
|
||||
# api responses, which will cause the scheduler to hang. The timeout is specified as [connect timeout, read timeout]
|
||||
kube_client_request_args = {"_request_timeout" : [60,60] }
|
||||
|
||||
# Worker pods security context options
|
||||
# See:
|
||||
# https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
|
||||
|
||||
# Specifies the uid to run the first process of the worker pods containers as
|
||||
run_as_user =
|
||||
|
||||
# Specifies a gid to associate with all containers in the worker pods
|
||||
# if using a git_ssh_key_secret_name use an fs_group
|
||||
# that allows for the key to be read, e.g. 65533
|
||||
fs_group =
|
||||
|
||||
[kubernetes_node_selectors]
|
||||
# The Key-value pairs to be given to worker pods.
|
||||
# The worker pods will be scheduled to the nodes of the specified key-value pairs.
|
||||
# Should be supplied in the format: key = value
|
||||
|
||||
[kubernetes_annotations]
|
||||
# The Key-value annotations pairs to be given to worker pods.
|
||||
# Should be supplied in the format: key = value
|
||||
|
||||
[kubernetes_environment_variables]
|
||||
# The scheduler sets the following environment variables into your workers. You may define as
|
||||
# many environment variables as needed and the kubernetes launcher will set them in the launched workers.
|
||||
# Environment variables in this section are defined as follows
|
||||
# <environment_variable_key> = <environment_variable_value>
|
||||
#
|
||||
# For example if you wanted to set an environment variable with value `prod` and key
|
||||
# `ENVIRONMENT` you would follow the following format:
|
||||
# ENVIRONMENT = prod
|
||||
#
|
||||
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
|
||||
# formatting as supported by airflow normally.
|
||||
|
||||
[kubernetes_secrets]
|
||||
# The scheduler mounts the following secrets into your workers as they are launched by the
|
||||
# scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the
|
||||
# defined secrets and mount them as secret environment variables in the launched workers.
|
||||
# Secrets in this section are defined as follows
|
||||
# <environment_variable_mount> = <kubernetes_secret_object>=<kubernetes_secret_key>
|
||||
#
|
||||
# For example if you wanted to mount a kubernetes secret key named `postgres_password` from the
|
||||
# kubernetes secret object `airflow-secret` as the environment variable `POSTGRES_PASSWORD` into
|
||||
# your workers you would follow the following format:
|
||||
# POSTGRES_PASSWORD = airflow-secret=postgres_credentials
|
||||
#
|
||||
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
|
||||
# formatting as supported by airflow normally.
|
||||
|
||||
[kubernetes_labels]
|
||||
# The Key-value pairs to be given to worker pods.
|
||||
# The worker pods will be given these static labels, as well as some additional dynamic labels
|
||||
# to identify the task.
|
||||
# Should be supplied in the format: key = value
|
||||
82
tools/python/airmaps/sandbox/airmaps/airmaps.ini
Normal file
82
tools/python/airmaps/sandbox/airmaps/airmaps.ini
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
[Main]
|
||||
# The path where the planet will be downloaded and the maps are generated.
|
||||
MAIN_OUT_PATH: /maps_build
|
||||
# If the flag DEBUG is set a special small planet file will be downloaded.
|
||||
DEBUG: 1
|
||||
|
||||
|
||||
[Developer]
|
||||
# The path where the generator_tool will be searched.
|
||||
BUILD_PATH: /omim-build-release
|
||||
# The path to the project directory omim.
|
||||
OMIM_PATH: /omim
|
||||
|
||||
|
||||
[Storage]
|
||||
# Webdaw settings.
|
||||
WD_HOST: webdav
|
||||
WD_LOGIN: alice
|
||||
WD_PASSWORD: secret1234
|
||||
|
||||
|
||||
[Generator tool]
|
||||
# The path to the omim/data.
|
||||
USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data
|
||||
# Do not change it. This is determined automatically.
|
||||
# NODE_STORAGE: map
|
||||
|
||||
|
||||
[Osm tools]
|
||||
# The path to the osmctools sources.
|
||||
OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools
|
||||
# The path where osmctools will be searched or will be built.
|
||||
OSM_TOOLS_PATH: /osmctools
|
||||
|
||||
|
||||
[Stages]
|
||||
# Run osmupdate tool for planet.
|
||||
NEED_PLANET_UPDATE: 0
|
||||
|
||||
|
||||
[Logging]
|
||||
# The path where maps_generator log will be saved.
|
||||
# LOG_FILE_PATH: generation.log
|
||||
|
||||
|
||||
[External]
|
||||
# The url to the planet file.
|
||||
# PLANET_URL:
|
||||
# The url to the file with md5 sum of the planet.
|
||||
# PLANET_MD5_URL:
|
||||
# The base url to WorldCoasts.geom and WorldCoasts.rawgeom (without file name).
|
||||
# Files latest_coasts.geom and latest_coasts.rawgeom must be at this URL.
|
||||
# For example, if PLANET_COASTS_URL = https://somesite.com/download/
|
||||
# The https://somesite.com/download/latest_coasts.geom url will be used to download latest_coasts.geom and
|
||||
# the https://somesite.com/download/latest_coasts.rawgeom url will be used to download latest_coasts.rawgeom.
|
||||
# PLANET_COASTS_URL:
|
||||
# The url to the subway file.
|
||||
SUBWAY_URL: https://cdn.organicmaps.app/subway.json
|
||||
|
||||
# Urls for production maps generation.
|
||||
# UGC_URL:
|
||||
# HOTELS_URL:
|
||||
# PROMO_CATALOG_CITIES:
|
||||
# POPULARITY_URL:
|
||||
# FOOD_URL:
|
||||
# FOOD_TRANSLATIONS_URL:
|
||||
# SRTM_PATH:
|
||||
# ISOLINES_PATH:
|
||||
# UK_POSTCODES_URL:
|
||||
# US_POSTCODES_URL:
|
||||
|
||||
|
||||
[Common]
|
||||
# Auto detection.
|
||||
THREADS_COUNT: 0
|
||||
# Emails for mailing.
|
||||
# EMAILS:
|
||||
|
||||
|
||||
[Stats]
|
||||
# Path to rules for calculating statistics by type
|
||||
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt
|
||||
13
tools/python/airmaps/sandbox/airmaps/run_airmaps_service.sh
Executable file
13
tools/python/airmaps/sandbox/airmaps/run_airmaps_service.sh
Executable file
|
|
@ -0,0 +1,13 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
export PYTHONPATH=/omim/tools/python
|
||||
export AIRFLOW_HOME=/airflow_home
|
||||
|
||||
# Initialize the database.
|
||||
airflow initdb
|
||||
|
||||
# Start the web server, default port is 8880.
|
||||
airflow webserver -p 8880 &
|
||||
|
||||
# Start the scheduler.
|
||||
airflow scheduler
|
||||
14
tools/python/airmaps/sandbox/build.sh
Executable file
14
tools/python/airmaps/sandbox/build.sh
Executable file
|
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
BUILD_PATH="$(dirname "$0")"
|
||||
OMIM_PATH="$(cd "${OMIM_PATH:-${BUILD_PATH}/../../../..}"; pwd)"
|
||||
|
||||
echo "Build airmaps service.."
|
||||
|
||||
mv "${OMIM_PATH}/.dockerignore" "${OMIM_PATH}/.dockerignore_" 2> /dev/null
|
||||
cp "${BUILD_PATH}/.dockerignore" ${OMIM_PATH}
|
||||
|
||||
docker-compose build
|
||||
|
||||
rm "${OMIM_PATH}/.dockerignore"
|
||||
mv "${OMIM_PATH}/.dockerignore_" "${OMIM_PATH}/.dockerignore" 2> /dev/null
|
||||
10
tools/python/airmaps/sandbox/clean.sh
Executable file
10
tools/python/airmaps/sandbox/clean.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
BUILD_PATH="$(dirname "$0")"
|
||||
OMIM_PATH="$(cd "${OMIM_PATH:-${BUILD_PATH}/../../../..}"; pwd)"
|
||||
|
||||
echo "Cleaning.."
|
||||
rm "${OMIM_PATH}/.dockerignore" 2> /dev/null
|
||||
mv "${OMIM_PATH}/.dockerignore_" "${OMIM_PATH}/.dockerignore" 2> /dev/null
|
||||
|
||||
rm -r "${BUILD_PATH}/storage" 2> /dev/null
|
||||
10
tools/python/airmaps/sandbox/create_storage.sh
Executable file
10
tools/python/airmaps/sandbox/create_storage.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
BUILD_PATH="$(dirname "$0")"
|
||||
|
||||
echo "Creating storage.."
|
||||
mkdir -p "${BUILD_PATH}/storage/tests/coasts"
|
||||
mkdir -p "${BUILD_PATH}/storage/tests/maps/open_source"
|
||||
mkdir -p "${BUILD_PATH}/storage/tests/planet_regular"
|
||||
|
||||
chown -R www-data:www-data "${BUILD_PATH}/storage/tests"
|
||||
41
tools/python/airmaps/sandbox/docker-compose.yml
Normal file
41
tools/python/airmaps/sandbox/docker-compose.yml
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
version: "3"
|
||||
services:
|
||||
webdav:
|
||||
image: sashgorokhov/webdav
|
||||
container_name: webdav
|
||||
hostname: webdav
|
||||
restart: always
|
||||
environment:
|
||||
USERNAME: alice
|
||||
PASSWORD: secret1234
|
||||
volumes:
|
||||
- ./storage/tests:/media/tests
|
||||
|
||||
db:
|
||||
image: postgres:12.2
|
||||
container_name: db
|
||||
hostname: db
|
||||
restart: always
|
||||
environment:
|
||||
POSTGRES_DB: airflow
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
|
||||
airmaps:
|
||||
build:
|
||||
context: ../../../..
|
||||
dockerfile: ./tools/python/airmaps/sandbox/airmaps/Dockerfile
|
||||
args:
|
||||
- TZ=Europe/Moscow
|
||||
container_name: airmaps
|
||||
hostname: airmaps
|
||||
restart: always
|
||||
links:
|
||||
- webdav
|
||||
- db
|
||||
ports:
|
||||
- "80:8880"
|
||||
command: ./tools/python/airmaps/sandbox/airmaps/run_airmaps_service.sh
|
||||
volumes:
|
||||
- ./airmaps/airflow.cfg:/airflow_home/airflow.cfg
|
||||
- ./airmaps/airmaps.ini:/omim/tools/python/airmaps/var/etc/airmaps.ini
|
||||
36
tools/python/airmaps/setup.py
Executable file
36
tools/python/airmaps/setup.py
Executable file
|
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
import setuptools
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
|
||||
|
||||
from pyhelpers.setup import chdir
|
||||
from pyhelpers.setup import get_version
|
||||
from pyhelpers.setup import get_requirements
|
||||
|
||||
|
||||
with chdir(os.path.abspath(os.path.dirname(__file__))):
|
||||
setuptools.setup(
|
||||
name="omim-airmaps",
|
||||
version=str(get_version()),
|
||||
author="CoMaps",
|
||||
author_email="info@comaps.app",
|
||||
description="This package contains tools for generating maps with Apache Airflow.",
|
||||
url="https://codeberg.org/comaps",
|
||||
package_dir={"airmaps": ""},
|
||||
package_data={"": ["var/**/*"]},
|
||||
packages=[
|
||||
"airmaps",
|
||||
"airmaps.dags",
|
||||
"airmaps.instruments",
|
||||
],
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
],
|
||||
python_requires=">=3.6",
|
||||
install_requires=get_requirements(),
|
||||
)
|
||||
82
tools/python/airmaps/var/etc/airmaps.ini.default
Normal file
82
tools/python/airmaps/var/etc/airmaps.ini.default
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
[Main]
|
||||
# The path where the planet will be downloaded and the maps are generated.
|
||||
MAIN_OUT_PATH: ~/maps_build
|
||||
# If the flag DEBUG is set a special small planet file will be downloaded.
|
||||
DEBUG: 1
|
||||
|
||||
|
||||
[Developer]
|
||||
# The path where the generator_tool will be searched.
|
||||
BUILD_PATH: ~/omim-build-release
|
||||
# The path to the project directory omim.
|
||||
OMIM_PATH: ~/omim
|
||||
|
||||
|
||||
[Storage]
|
||||
# Webdaw settings.
|
||||
# WD_HOST:
|
||||
# WD_LOGIN:
|
||||
# WD_PASSWORD:
|
||||
|
||||
|
||||
[Generator tool]
|
||||
# The path to the omim/data.
|
||||
USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data
|
||||
# Do not change it. This is determined automatically.
|
||||
# NODE_STORAGE: map
|
||||
|
||||
|
||||
[Osm tools]
|
||||
# The path to the osmctools sources.
|
||||
OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools
|
||||
# The path where osmctools will be searched or will be built.
|
||||
OSM_TOOLS_PATH: ~/osmctools
|
||||
|
||||
|
||||
[Stages]
|
||||
# Run osmupdate tool for planet.
|
||||
NEED_PLANET_UPDATE: 0
|
||||
|
||||
|
||||
[Logging]
|
||||
# The path where maps_generator log will be saved.
|
||||
# LOG_FILE_PATH: generation.log
|
||||
|
||||
|
||||
[External]
|
||||
# The url to the planet file.
|
||||
# PLANET_URL:
|
||||
# The url to the file with md5 sum of the planet.
|
||||
# PLANET_MD5_URL:
|
||||
# The base url to WorldCoasts.geom and WorldCoasts.rawgeom (without file name).
|
||||
# Files latest_coasts.geom and latest_coasts.rawgeom must be at this URL.
|
||||
# For example, if PLANET_COASTS_URL = https://somesite.com/download/
|
||||
# The https://somesite.com/download/latest_coasts.geom url will be used to download latest_coasts.geom and
|
||||
# the https://somesite.com/download/latest_coasts.rawgeom url will be used to download latest_coasts.rawgeom.
|
||||
# PLANET_COASTS_URL:
|
||||
# The url to the subway file.
|
||||
SUBWAY_URL: https://cdn.organicmaps.app/subway.json
|
||||
|
||||
# Urls for production maps generation.
|
||||
# UGC_URL:
|
||||
# HOTELS_URL:
|
||||
# PROMO_CATALOG_CITIES:
|
||||
# POPULARITY_URL:
|
||||
# FOOD_URL:
|
||||
# FOOD_TRANSLATIONS_URL:
|
||||
# SRTM_PATH:
|
||||
# ISOLINES_PATH:
|
||||
# UK_POSTCODES_URL:
|
||||
# US_POSTCODES_URL:
|
||||
|
||||
|
||||
[Common]
|
||||
# Auto detection.
|
||||
THREADS_COUNT: 0
|
||||
# Emails for mailing.
|
||||
# EMAILS:
|
||||
|
||||
|
||||
[Stats]
|
||||
# Path to rules for calculating statistics by type
|
||||
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt
|
||||
86
tools/python/booking_hotels_quality.py
Executable file
86
tools/python/booking_hotels_quality.py
Executable file
|
|
@ -0,0 +1,86 @@
|
|||
#!/usr/bin/env python3
|
||||
# coding: utf8
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import namedtuple, defaultdict
|
||||
from datetime import datetime
|
||||
from sklearn import metrics
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
import pickle
|
||||
import time
|
||||
import urllib2
|
||||
import re
|
||||
|
||||
# init logging
|
||||
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
|
||||
|
||||
|
||||
def load_binary_list(path):
|
||||
"""Loads reference binary classifier output. """
|
||||
bits = []
|
||||
with open(path, 'r') as fd:
|
||||
for line in fd:
|
||||
if (not line.strip()) or line.startswith('#'):
|
||||
continue
|
||||
bits.append(1 if line.startswith('y') else 0)
|
||||
return bits
|
||||
|
||||
|
||||
def load_score_list(path):
|
||||
"""Loads list of matching scores. """
|
||||
scores = []
|
||||
with open(path, 'r') as fd:
|
||||
for line in fd:
|
||||
if (not line.strip()) or line.startswith('#'):
|
||||
continue
|
||||
scores.append(float(re.search(r'result score: (\d*\.\d+)', line).group(1)))
|
||||
return scores
|
||||
|
||||
|
||||
def process_options():
|
||||
# TODO(mgsergio): Fix description.
|
||||
parser = argparse.ArgumentParser(description="Download and process booking hotels.")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose")
|
||||
parser.add_argument("-q", "--quiet", action="store_false", dest="verbose")
|
||||
|
||||
parser.add_argument("--reference_list", dest="reference_list", help="Path to data files")
|
||||
parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file")
|
||||
|
||||
parser.add_argument("--show", dest="show", default=False, action="store_true",
|
||||
help="Show graph for precision and recall")
|
||||
|
||||
options = parser.parse_args()
|
||||
|
||||
if not options.reference_list or not options.sample_list:
|
||||
parser.print_help()
|
||||
exit()
|
||||
|
||||
return options
|
||||
|
||||
|
||||
def main():
|
||||
options = process_options()
|
||||
reference = load_binary_list(options.reference_list)
|
||||
sample = load_score_list(options.sample_list)
|
||||
|
||||
precision, recall, threshold = metrics.precision_recall_curve(reference, sample)
|
||||
aa = zip(precision, recall, threshold)
|
||||
max_by_hmean = max(aa, key=lambda (p, r, t): p*r/(p+r))
|
||||
print("Optimal threshold: {2} for precision: {0} and recall: {1}".format(*max_by_hmean))
|
||||
print("AUC: {0}".format(metrics.roc_auc_score(reference, sample)))
|
||||
|
||||
if options.show:
|
||||
plt.plot(recall, precision)
|
||||
plt.title("Precision/Recall")
|
||||
plt.ylabel("Precision")
|
||||
plt.xlabel("Recall")
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
114
tools/python/categories/consistency.py
Executable file
114
tools/python/categories/consistency.py
Executable file
|
|
@ -0,0 +1,114 @@
|
|||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
from string import digits
|
||||
|
||||
CONTENT_REGEX = re.compile(r'/\*.*?\*/', re.DOTALL)
|
||||
TYPE_ENTRIES_REGEX = re.compile(r'"(.*?)"\s*=\s*"(.*?)"')
|
||||
SINGLE_REPLACE = False
|
||||
|
||||
def main(lang, data_en):
|
||||
strings_file_path = os.path.join('iphone', 'Maps', 'LocalizedStrings', f'{lang}.lproj', 'LocalizableTypes.strings')
|
||||
json_file_path = os.path.join('data', 'categories-strings', f'{lang}.json', 'localize.json')
|
||||
|
||||
with open(strings_file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove comments
|
||||
content = re.sub(CONTENT_REGEX, '', content)
|
||||
|
||||
type_entries = {key[5:]: value for key, value in re.findall(TYPE_ENTRIES_REGEX, content)}
|
||||
|
||||
with open(json_file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
for type_name, localized_value in type_entries.items():
|
||||
key_matched = False
|
||||
for json_key in data.keys():
|
||||
json_key_split = json_key.split('|')
|
||||
for key in json_key_split:
|
||||
already_there = False
|
||||
_key_matched = False
|
||||
|
||||
if type_name == key.replace('-', '.').replace('_', '.'):
|
||||
key_matched = True
|
||||
data_split = data[json_key].split('|')
|
||||
|
||||
try:
|
||||
data_split.extend([
|
||||
value
|
||||
for category in
|
||||
[a for a in json_key_split
|
||||
if a.startswith('@')]
|
||||
for value in
|
||||
data[category].split('|')
|
||||
])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for value in data_split:
|
||||
if value and value[0] in digits:
|
||||
value = value[1:]
|
||||
|
||||
value = value.lower()
|
||||
localized_value_lower = localized_value.lower()
|
||||
|
||||
# Prevents adding duplicates that differ only by the word "shop"
|
||||
if value in localized_value_lower:
|
||||
already_there = True
|
||||
break
|
||||
|
||||
if localized_value_lower == value:
|
||||
_key_matched = True
|
||||
break
|
||||
|
||||
if already_there:
|
||||
break
|
||||
|
||||
if not _key_matched:
|
||||
if SINGLE_REPLACE and len(data_split) == 1:
|
||||
data[json_key] = localized_value
|
||||
print(f'Replaced "{data[json_key]}" with "{localized_value}" in "{json_key}"')
|
||||
|
||||
else:
|
||||
data[json_key] = localized_value+'|'+data[json_key]
|
||||
print(f'Appended "{localized_value}" to "{json_key}"')
|
||||
|
||||
if not key_matched:
|
||||
for json_key in data.keys():
|
||||
for key in json_key.split('|'):
|
||||
if type_name == key.replace('-', '.').replace('_', '.'):
|
||||
print(f'Created "{localized_value}" for "{json_key}"')
|
||||
data.update({json_key: localized_value})
|
||||
|
||||
res = json.dumps(data, ensure_ascii=False, separators=(",\n", ": ")
|
||||
).replace('{', '{\n').replace('}', '\n}')
|
||||
|
||||
with open(json_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(res)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} [-r] <language_codes>")
|
||||
sys.exit(1)
|
||||
|
||||
if sys.argv[1] == '-r':
|
||||
SINGLE_REPLACE = True
|
||||
del sys.argv[1]
|
||||
if len(sys.argv) < 2:
|
||||
print("No languages specified")
|
||||
sys.exit(1)
|
||||
|
||||
with open('data/categories-strings/en.json/localize.json', 'r', encoding='utf-8') as f:
|
||||
data_en = json.load(f)
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
for lang in sys.argv[1:]:
|
||||
print(f'{lang}:')
|
||||
main(lang, data_en)
|
||||
print('\n')
|
||||
else:
|
||||
main(sys.argv[1], data_en)
|
||||
77
tools/python/categories/json_to_txt.py
Executable file
77
tools/python/categories/json_to_txt.py
Executable file
|
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
|
||||
LANGUAGES = (
|
||||
'af', 'ar', 'be', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'en-AU',
|
||||
'en-GB', 'en-US', 'es', 'es-MX', 'et', 'eu', 'fa', 'fi', 'fr', 'fr-CA',
|
||||
'he', 'hi', 'hu', 'id', 'it', 'ja', 'ko', 'lt', 'lv', 'mr', 'nb', 'nl',
|
||||
'pl', 'pt', 'pt-BR', 'ro', 'ru', 'sk', 'sr', 'sv', 'sw', 'th', 'tr', 'uk',
|
||||
'vi', 'zh-Hans', 'zh-Hant'
|
||||
)
|
||||
|
||||
|
||||
def load_localize_json(lang_dir):
|
||||
file_path = os.path.join(lang_dir, 'localize.json')
|
||||
if not os.path.isfile(file_path):
|
||||
return {}
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error decoding JSON from {file_path}: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def collect_all_keys(base_dir):
|
||||
all_data = {}
|
||||
lang_dirs = [d for d in os.listdir(base_dir) if d.endswith('.json')]
|
||||
|
||||
for lang_dir in lang_dirs:
|
||||
lang = lang_dir.replace('.json', '')
|
||||
if lang not in LANGUAGES:
|
||||
print(f"Skipping unsupported language directory: {lang_dir}")
|
||||
continue
|
||||
full_path = os.path.join(base_dir, lang_dir)
|
||||
if os.path.isdir(full_path):
|
||||
data = load_localize_json(full_path)
|
||||
for key, value in data.items():
|
||||
if key not in all_data:
|
||||
all_data[key] = {}
|
||||
all_data[key][lang] = value
|
||||
|
||||
return all_data
|
||||
|
||||
|
||||
def write_category_file(all_data, output_file):
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
for i, (key, translations) in enumerate(all_data.items()):
|
||||
f.write(key + '\n')
|
||||
for lang in LANGUAGES:
|
||||
if lang in translations and translations[lang]:
|
||||
f.write(f"{lang}:{translations[lang]}\n")
|
||||
elif lang == 'en' and key in translations:
|
||||
f.write('\n')
|
||||
if i < len(all_data) - 1:
|
||||
f.write('\n')
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} <json_directory> [categories.txt]")
|
||||
sys.exit(1)
|
||||
|
||||
base_dir = sys.argv[1]
|
||||
output_file = sys.argv[2] if len(sys.argv) > 2 else "categories.txt"
|
||||
|
||||
if not os.path.isdir(base_dir):
|
||||
print(f"Directory not found: {base_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
all_data = collect_all_keys(base_dir)
|
||||
write_category_file(all_data, output_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
83
tools/python/categories/txt_to_json.py
Executable file
83
tools/python/categories/txt_to_json.py
Executable file
|
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
|
||||
LANGUAGES = (
|
||||
'af', 'ar', 'be', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'en-AU',
|
||||
'en-GB', 'en-US', 'es', 'es-MX', 'et', 'eu', 'fa', 'fi', 'fr', 'fr-CA',
|
||||
'he', 'hi', 'hu', 'id', 'it', 'ja', 'ko', 'lt', 'lv', 'mr', 'nb', 'nl',
|
||||
'pl', 'pt', 'pt-BR', 'ro', 'ru', 'sk', 'sr', 'sv', 'sw', 'th', 'tr', 'uk',
|
||||
'vi', 'zh-Hans', 'zh-Hant'
|
||||
)
|
||||
|
||||
# TODO: respect the order of key/values in the JSON when converting back and forth
|
||||
|
||||
def parse_translations(input_file):
|
||||
"""
|
||||
Parses a translation file and generates a JSON file per language.
|
||||
"""
|
||||
# Read the input file line by line
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
lines = [line.rstrip('\n') for line in f]
|
||||
|
||||
# Split the file into blocks separated by empty lines
|
||||
blocks = []
|
||||
current_block = []
|
||||
for line in lines:
|
||||
stripped_line = line.strip()
|
||||
if stripped_line.startswith('#'):
|
||||
continue
|
||||
if not stripped_line:
|
||||
if current_block:
|
||||
blocks.append(current_block)
|
||||
current_block = []
|
||||
else:
|
||||
current_block.append(line)
|
||||
if current_block:
|
||||
blocks.append(current_block)
|
||||
|
||||
# Initialize dictionaries for each language
|
||||
lang_data = {lang: {} for lang in LANGUAGES}
|
||||
|
||||
# Process each block
|
||||
for block in blocks:
|
||||
key_line = block[0]
|
||||
has_translation = False
|
||||
for line in block[1:]:
|
||||
if ':' not in line:
|
||||
print(f"Skipping invalid line: {line}")
|
||||
continue
|
||||
lang, translation = line.split(':', 1)
|
||||
lang = lang.strip()
|
||||
translation = translation.strip()
|
||||
if lang in LANGUAGES:
|
||||
lang_data[lang][key_line] = translation
|
||||
has_translation = True
|
||||
else:
|
||||
print(f"Warning: Unsupported language {lang} in line: {line}")
|
||||
|
||||
if not has_translation:
|
||||
lang_data['en'][key_line] = ""
|
||||
|
||||
# Write JSON files
|
||||
for lang, data in lang_data.items():
|
||||
if not data:
|
||||
continue
|
||||
dir_name = f"{lang}.json"
|
||||
os.makedirs(dir_name, exist_ok=True)
|
||||
file_path = os.path.join(dir_name, 'localize.json')
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(json.dumps(data, ensure_ascii=False, separators=(",\n", ": ")).replace('{', '{\n').replace('}', '\n}'))
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} <categories.txt>")
|
||||
sys.exit(1)
|
||||
input_file = sys.argv[1]
|
||||
parse_translations(input_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
129
tools/python/categories_converter.py
Executable file
129
tools/python/categories_converter.py
Executable file
|
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/env python3
|
||||
#coding: utf8
|
||||
from __future__ import print_function
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from collections import defaultdict
|
||||
from find_untranslated_strings import ITUNES_LANGS
|
||||
|
||||
|
||||
class CategoriesConverter:
|
||||
def __init__(self):
|
||||
args = self.parse_args()
|
||||
self.categories = CategoriesTxt(args.categories)
|
||||
self.should_format = args.format
|
||||
self.output = args.output
|
||||
|
||||
|
||||
def process(self):
|
||||
if self.should_format:
|
||||
self.categories.write_formatted()
|
||||
else:
|
||||
self.categories.write_as_strings(self.output)
|
||||
|
||||
|
||||
def parse_args(self):
|
||||
parser = ArgumentParser(
|
||||
description="""
|
||||
A script for converting categories.txt into the strings.txt format
|
||||
and back, as well as for autoformatting categories.txt. This is
|
||||
useful for interacting with the translation partners.
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--categories",
|
||||
required=True,
|
||||
dest="categories",
|
||||
help="""Path to the categories file to be converted into the strings.txt format."""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o", "--output",
|
||||
dest="output",
|
||||
help="""The destination file."""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f", "--format",
|
||||
dest="format", action="store_true", default=False,
|
||||
help="""Format the file and exit"""
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class CategoriesTxt:
|
||||
"""For now, let's allow comments only at the beginning of the file."""
|
||||
def __init__(self, filepath):
|
||||
self.translations = defaultdict(lambda: defaultdict(str))
|
||||
self.keys_in_order = []
|
||||
self.comments = []
|
||||
self.filepath = filepath
|
||||
self.all_langs = set()
|
||||
self.parse_file()
|
||||
|
||||
|
||||
def parse_file(self):
|
||||
current_key = ""
|
||||
this_line_is_key = True
|
||||
with open(self.filepath) as infile:
|
||||
for line in map(str.strip, infile):
|
||||
if line.startswith("#"):
|
||||
self.comments.append(line)
|
||||
this_line_is_key = True
|
||||
elif not line:
|
||||
this_line_is_key = True
|
||||
elif this_line_is_key:
|
||||
self.keys_in_order.append(line)
|
||||
current_key = line
|
||||
this_line_is_key = False
|
||||
else:
|
||||
pos = line.index(':')
|
||||
lang = line[:pos]
|
||||
translation = line[pos + 1:]
|
||||
self.translations[current_key][lang] = translation
|
||||
|
||||
|
||||
def write_as_categories(self, outfile):
|
||||
self.write_strings_formatted(outfile, "\n{}\n", "{}:{}\n")
|
||||
|
||||
|
||||
def write_as_strings(self, filepath):
|
||||
with open(filepath, "w") as outfile:
|
||||
self.write_strings_formatted(outfile, key_format="\n [{}]\n", line_format=" {} = {}\n")
|
||||
|
||||
|
||||
def write_strings_formatted(self, outfile, key_format, line_format):
|
||||
for key in self.keys_in_order:
|
||||
outfile.write(key_format.format(key.strip("[]")))
|
||||
pair = self.translations[key]
|
||||
for lang in ITUNES_LANGS:
|
||||
if lang in pair:
|
||||
outfile.write(line_format.format(lang, pair[lang]))
|
||||
remaining_langs = sorted(list(set(pair.keys()) - set(ITUNES_LANGS)))
|
||||
for lang in remaining_langs:
|
||||
outfile.write(line_format.format(lang, pair[lang]))
|
||||
|
||||
|
||||
def add_translation(self, translation, key, lang):
|
||||
if key not in self.keys_in_order:
|
||||
self.keys_in_order.append(key)
|
||||
self.translations[key][lang] = translation
|
||||
|
||||
|
||||
def append_to_translation(self, translation, key, lang):
|
||||
self.translations[key][lang] += translation
|
||||
|
||||
|
||||
def write_formatted(self):
|
||||
with open(self.filepath, "w") as outfile:
|
||||
for comment in self.comments:
|
||||
outfile.write(comment + "\n")
|
||||
|
||||
self.write_as_categories(outfile)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
c = CategoriesConverter()
|
||||
c.process()
|
||||
50
tools/python/categories_merge_language.py
Executable file
50
tools/python/categories_merge_language.py
Executable file
|
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
|
||||
path = os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'data', 'categories.txt')
|
||||
if len(sys.argv) < 2:
|
||||
print('Merges some language in categories.txt with English')
|
||||
print('Usage: {} <lang> [path_to_categories.txt]'.format(sys.argv[0]))
|
||||
print('Default path to categories: {}'.format(path))
|
||||
if not os.path.exists(path):
|
||||
print('Warning: default path to categories.txt will fail')
|
||||
sys.exit(1)
|
||||
|
||||
lang = sys.argv[1]
|
||||
if len(sys.argv) > 2:
|
||||
path = sys.argv[2]
|
||||
|
||||
with open(path, 'r') as f:
|
||||
langs = []
|
||||
trans = None
|
||||
|
||||
def flush_langs():
|
||||
for lang in langs:
|
||||
if trans and l[0] == 'en':
|
||||
parts = lang[1].split('|')
|
||||
parts[0] = '{} - {}'.format(parts[0], trans)
|
||||
lang[1] = '|'.join(parts)
|
||||
print(':'.join(lang))
|
||||
|
||||
for line in map(str.strip, f):
|
||||
if len(line) == 0 or line[0] == '#':
|
||||
if langs:
|
||||
flush_langs()
|
||||
langs = []
|
||||
trans = None
|
||||
print(line)
|
||||
elif not langs:
|
||||
print(line)
|
||||
else:
|
||||
if ':' not in line:
|
||||
raise Exception('Line {} is not a translation line'.format(line))
|
||||
l = line.split(':')
|
||||
langs.append(l)
|
||||
if l[0] == lang:
|
||||
trans = l[1].split('|')[0]
|
||||
if trans[0].isdigit():
|
||||
trans = trans[1:]
|
||||
if trans[0] == '^':
|
||||
trans = trans[1:]
|
||||
flush_langs()
|
||||
232
tools/python/check_store_metadata.py
Executable file
232
tools/python/check_store_metadata.py
Executable file
|
|
@ -0,0 +1,232 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# Check AppStore / GooglePlay / F-Droid metadata
|
||||
#
|
||||
|
||||
import os
|
||||
import sys
|
||||
import glob
|
||||
import shutil
|
||||
from urllib.parse import urlparse
|
||||
|
||||
os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", ".."))
|
||||
|
||||
# https://support.google.com/googleplay/android-developer/answer/9844778?visit_id=637740303439369859-3116807078&rd=1#zippy=%2Cview-list-of-available-languages
|
||||
GPLAY_LOCALES = [
|
||||
"af", # Afrikaans
|
||||
"sq", # Albanian
|
||||
"am", # Amharic
|
||||
"ar", # Arabic
|
||||
"hy-AM", # Armenian
|
||||
"az-AZ", # Azerbaijani
|
||||
"bn-BD", # Bangla
|
||||
"eu-ES", # Basque
|
||||
"be", # Belarusian
|
||||
"bg", # Bulgarian
|
||||
"my-MM", # Burmese
|
||||
"ca", # Catalan
|
||||
"zh-HK", # Chinese (Hong Kong)
|
||||
"zh-CN", # Chinese (Simplified)
|
||||
"zh-TW", # Chinese (Traditional)
|
||||
"hr", # Croatian
|
||||
"cs-CZ", # Czech
|
||||
"da-DK", # Danish
|
||||
"nl-NL", # Dutch
|
||||
"en-IN", # English
|
||||
"en-SG", # English
|
||||
"en-ZA", # English
|
||||
"en-AU", # English (Australia)
|
||||
"en-CA", # English (Canada)
|
||||
"en-GB", # English (United Kingdom)
|
||||
"en-US", # English (United States)
|
||||
"et", # Estonian
|
||||
"fil", # Filipino
|
||||
"fi-FI", # Finnish
|
||||
"fr-CA", # French (Canada)
|
||||
"fr-FR", # French (France)
|
||||
"gl-ES", # Galician
|
||||
"ka-GE", # Georgian
|
||||
"de-DE", # German
|
||||
"el-GR", # Greek
|
||||
"gu", # Gujarati
|
||||
"iw-IL", # Hebrew
|
||||
"hi-IN", # Hindi
|
||||
"hu-HU", # Hungarian
|
||||
"is-IS", # Icelandic
|
||||
"id", # Indonesian
|
||||
"it-IT", # Italian
|
||||
"ja-JP", # Japanese
|
||||
"kn-IN", # Kannada
|
||||
"kk", # Kazakh
|
||||
"km-KH", # Khmer
|
||||
"ko-KR", # Korean
|
||||
"ky-KG", # Kyrgyz
|
||||
"lo-LA", # Lao
|
||||
"lv", # Latvian
|
||||
"lt", # Lithuanian
|
||||
"mk-MK", # Macedonian
|
||||
"ms", # Malay
|
||||
"ms-MY", # Malay (Malaysia)
|
||||
"ml-IN", # Malayalam
|
||||
"mr-IN", # Marathi
|
||||
"mn-MN", # Mongolian
|
||||
"ne-NP", # Nepali
|
||||
"no-NO", # Norwegian
|
||||
"fa", # Persian
|
||||
"fa-AE", # Persian
|
||||
"fa-AF", # Persian
|
||||
"fa-IR", # Persian
|
||||
"pl-PL", # Polish
|
||||
"pt-BR", # Portuguese (Brazil)
|
||||
"pt-PT", # Portuguese (Portugal)
|
||||
"pa", # Punjabi
|
||||
"ro", # Romanian
|
||||
"rm", # Romansh
|
||||
"ru-RU", # Russian
|
||||
"sr", # Serbian
|
||||
"si-LK", # Sinhala
|
||||
"sk", # Slovak
|
||||
"sl", # Slovenian
|
||||
"es-419", # Spanish (Latin America)
|
||||
"es-ES", # Spanish (Spain)
|
||||
"es-US", # Spanish (United States)
|
||||
"sw", # Swahili
|
||||
"sv-SE", # Swedish
|
||||
"ta-IN", # Tamil
|
||||
"te-IN", # Telugu
|
||||
"th", # Thai
|
||||
"tr-TR", # Turkish
|
||||
"uk", # Ukrainian
|
||||
"ur", # Urdu
|
||||
"vi", # Vietnamese
|
||||
"zu", # Zulu
|
||||
]
|
||||
|
||||
# From a Fastline error message and https://help.apple.com/app-store-connect/#/dev997f9cf7c
|
||||
APPSTORE_LOCALES = [
|
||||
"ar-SA", "ca", "cs", "da", "de-DE", "el", "en-AU", "en-CA", "en-GB", "en-US", "es-ES", "es-MX", "fi", "fr-CA", "fr-FR", "he", "hi", "hr", "hu", "id", "it", "ja", "ko", "ms", "nl-NL", "no", "pl", "pt-BR", "pt-PT", "ro", "ru", "sk", "sv", "th", "tr", "uk", "vi", "zh-Hans", "zh-Hant"
|
||||
]
|
||||
|
||||
def error(path, message, *args, **kwargs):
|
||||
print("❌", path + ":", message.format(*args, **kwargs), file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def done(path, ok):
|
||||
if ok:
|
||||
print("✅", path)
|
||||
return ok
|
||||
|
||||
def check_raw(path, max_length, ignoreEmptyFilesAndNewLines=False):
|
||||
ok = True
|
||||
with open(path, 'r') as f:
|
||||
text = f.read()
|
||||
if not ignoreEmptyFilesAndNewLines:
|
||||
if not text:
|
||||
ok = error(path, "empty")
|
||||
elif text[-1] == os.linesep:
|
||||
text = text[:-1]
|
||||
else:
|
||||
ok = error(path, "missing new line")
|
||||
else:
|
||||
text = text.strip()
|
||||
|
||||
cur_length = len(text)
|
||||
if cur_length > max_length:
|
||||
ok = error(path, "too long: got={}, expected={}", cur_length, max_length)
|
||||
return ok, text
|
||||
|
||||
def check_text(path, max, optional=False, ignoreEmptyFilesAndNewLines=False):
|
||||
try:
|
||||
return done(path, check_raw(path, max, ignoreEmptyFilesAndNewLines)[0])
|
||||
except FileNotFoundError as e:
|
||||
if optional:
|
||||
return True,
|
||||
print("🚫", path)
|
||||
return False,
|
||||
|
||||
def check_url(path, ignoreEmptyFilesAndNewLines=False):
|
||||
(ok, url) = check_raw(path, 500, ignoreEmptyFilesAndNewLines)
|
||||
url = urlparse(url)
|
||||
if not url.scheme in ('https', 'http'):
|
||||
ok = error(path, "invalid URL: {}", url)
|
||||
return done(path, ok)
|
||||
|
||||
def check_email(path):
|
||||
(ok, email) = check_raw(path, 500)
|
||||
ok = ok and email.find('@') != -1 and email.find('.') != -1
|
||||
return done(path, ok)
|
||||
|
||||
def check_exact(path, expected):
|
||||
(ok, value) = check_raw(path, len(expected))
|
||||
if value != expected:
|
||||
ok = error(path, "invalid value: got={}, expected={}", value, expected)
|
||||
return done(path, ok)
|
||||
|
||||
|
||||
def check_android(is_gplay):
|
||||
ok = True
|
||||
flavor = "google" if is_gplay else "fdroid"
|
||||
flavor = f'android/app/src/{flavor}/play/'
|
||||
ok = check_url(flavor + 'contact-website.txt') and ok
|
||||
ok = check_email(flavor + 'contact-email.txt') and ok
|
||||
ok = check_exact(flavor + 'default-language.txt', 'en-US') and ok
|
||||
for locale in glob.glob(flavor + 'listings/*/'):
|
||||
if is_gplay and locale.split('/')[-2] not in GPLAY_LOCALES:
|
||||
ok = error(locale, 'unsupported locale') and ok
|
||||
continue
|
||||
ok = check_text(locale + 'title.txt', 30 if is_gplay else 50) and ok
|
||||
ok = check_text(locale + 'short-description.txt', 80) and ok
|
||||
ok = check_text(locale + 'full-description.txt', 4000) and ok
|
||||
ok = check_text(locale + 'release-notes.txt', 499, optional=True) and ok
|
||||
''' TODO: relnotes not necessary exist for all languages, but symlinks are made for all
|
||||
for locale in glob.glob(flavor + 'release-notes/*/'):
|
||||
if locale.split('/')[-2] not in GPLAY_LOCALES:
|
||||
ok = error(locale, 'unsupported locale') and ok
|
||||
continue
|
||||
ok = check_text(locale + 'default.txt', 499) and ok
|
||||
'''
|
||||
return ok
|
||||
|
||||
|
||||
def check_ios():
|
||||
ok = True
|
||||
for locale in glob.glob('iphone/metadata/*/'):
|
||||
if locale.split('/')[-2] not in APPSTORE_LOCALES:
|
||||
ok = error(locale, "unsupported locale") and ok
|
||||
continue
|
||||
|
||||
locale_complete = True
|
||||
for name in ["description.txt", "keywords.txt", "marketing_url.txt", "privacy_url.txt", "subtitle.txt", "support_url.txt"]:
|
||||
name_path = os.path.join(locale, name)
|
||||
if not os.path.exists(name_path):
|
||||
locale_complete = False
|
||||
|
||||
if locale_complete:
|
||||
ok = check_text(locale + "subtitle.txt", 30, False, True) and ok
|
||||
ok = check_text(locale + "description.txt", 4000, False, True) and ok
|
||||
ok = check_text(locale + "release_notes.txt", 4000, True, True) and ok
|
||||
ok = check_text(locale + "keywords.txt", 100, False, True) and ok
|
||||
ok = check_url(locale + "support_url.txt", True) and ok
|
||||
ok = check_url(locale + "marketing_url.txt", True) and ok
|
||||
ok = check_url(locale + "privacy_url.txt", True) and ok
|
||||
|
||||
return ok
|
||||
|
||||
if __name__ == "__main__":
|
||||
ok = True
|
||||
if len(sys.argv) == 2 and sys.argv[1] == 'gplay':
|
||||
if check_android(is_gplay=True):
|
||||
sys.exit(0)
|
||||
sys.exit(2)
|
||||
if len(sys.argv) == 2 and sys.argv[1] == 'fdroid':
|
||||
if check_android(is_gplay=False):
|
||||
sys.exit(0)
|
||||
sys.exit(2)
|
||||
elif len(sys.argv) == 2 and sys.argv[1] == "ios":
|
||||
if check_ios():
|
||||
sys.exit(0)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print("Usage:", sys.argv[0], "gplay|fdroid|ios", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
82
tools/python/city_popul_sqr.data
Normal file
82
tools/python/city_popul_sqr.data
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# City* Population*Region* | Width km* | Height km* | Square km2* | AVG | MAX | SQRT(S)
|
||||
# Columns with * symbol are required
|
||||
New York | 8405837 | USA | 56.5 | 50.2 | 1214 | 53.35 | 56.5 | 34.84250278
|
||||
Los Angeles | 3792621 | USA | 51 | 80.5 | 1301.97 | 65.75 | 80.5 | 36.0828214
|
||||
Chicago | 2714856 | USA | 20.9 | 44.6 | 606.1 | 32.75 | 44.6 | 24.61909828
|
||||
Philadelphia | 1447395 | USA | 30.3 | 41.8 | 369.9 | 36.05 | 41.8 | 19.23278451
|
||||
Dallas | 1223229 | USA | 38.6 | 50.7 | 997.1 | 44.65 | 50.7 | 31.57689028
|
||||
San Francisco | 815358 | USA | 12 | 12.7 | 600.6 | 12.35 | 12.7 | 24.50714182
|
||||
Detroit | 701475 | USA | 33.5 | 31.9 | 370.2 | 32.7 | 33.5 | 19.24058211
|
||||
Memphis | 677272 | USA | 38 | 32.2 | 763.4 | 35.1 | 38 | 27.62969417
|
||||
Seattle | 634535 | USA | 11.9 | 27.4 | 369.2 | 19.65 | 27.4 | 19.2145778
|
||||
Boston | 625087 | USA | 15.1 | 18.2 | 232.1 | 16.65 | 18.2 | 15.23482852
|
||||
Las Vegas | 596424 | USA | 43.1 | 31.9 | 340 | 37.5 | 43.1 | 18.43908891
|
||||
Atlanta | 443775 | USA | 26.6 | 29.3 | 343 | 27.95 | 29.3 | 18.52025918
|
||||
Miami | 433146 | USA | 13.7 | 15.8 | 92.42 | 14.75 | 15.8 | 9.613532129
|
||||
New Orleans | 343829 | USA | 46 | 15.1 | 907 | 30.55 | 46 | 30.11644069
|
||||
Pittsburgh | 312819 | USA | 23.3 | 26.9 | 151.1 | 25.1 | 26.9 | 12.292274
|
||||
Orlando | 238300 | USA | 24.8 | 22.2 | 286.7 | 23.5 | 24.8 | 16.93221781
|
||||
Des Moines | 203433 | USA | 18.2 | 17.2 | 213.9 | 17.7 | 18.2 | 14.62532051
|
||||
Salt Lake City | 181698 | USA | 27.8 | 12.1 | 285.9 | 19.95 | 27.8 | 16.9085777
|
||||
Aurora | 7508 | USA | 5 | 4.8 | 15.49 | 4.9 | 5 | 3.935733731
|
||||
Barthsville | 36245 | USA | 9.3 | 11.9 | 58.9 | 10.6 | 11.9 | 7.674633542
|
||||
Farmington | 45854 | USA | 10 | 12.9 | 69.93 | 11.45 | 12.9 | 8.362415919
|
||||
Dumas | 14989 | USA | 3.2 | 8.2 | 14.29 | 5.7 | 8.2 | 3.780211634
|
||||
Grand Island | 49989 | USA | 10.1 | 8.4 | 73.94 | 9.25 | 10.1 | 8.598837131
|
||||
Hastings | 25058 | USA | 8.9 | 6.3 | 35.38 | 7.6 | 8.9 | 5.948108943
|
||||
Decatur | 9362 | USA | 7.4 | 4.8 | 15 | 6.1 | 7.4 | 3.872983346
|
||||
Van Wert | 10844 | USA | 4.5 | 5.3 | 19.71 | 4.9 | 5.3 | 4.439594576
|
||||
Ottawa | 4417 | USA | 8 | 4.7 | 12.35 | 6.35 | 8 | 3.514256678
|
||||
Blythe | 20590 | USA | 9.5 | 22.9 | 69.86 | 16.2 | 22.9 | 8.358229478
|
||||
Morgan Hill | 39420 | USA | 11.3 | 13.9 | 33.36 | 12.6 | 13.9 | 5.775811631
|
||||
London | 8308369 | Europe | 58.9 | 49.9 | 1572 | 54.4 | 58.9 | 39.6484552
|
||||
Athens | 3074160 | Europe | 9.9 | 10.7 | 412 | 10.3 | 10.7 | 20.29778313
|
||||
Berlin | 3397469 | Europe | 40.7 | 37.8 | 891.85 | 39.25 | 40.7 | 29.86385775
|
||||
Madrid | 3215633 | Europe | 32 | 25.1 | 605.77 | 28.55 | 32 | 24.61239525
|
||||
Rome | 2863322 | Europe | 28.2 | 31.9 | 1285.31 | 30.05 | 31.9 | 35.85122034
|
||||
Paris | 10413386 | Europe | 10.9 | 10.6 | 2844.80 | 10.75 | 10.9 | 53.33666656
|
||||
Bucharest | 1883425 | Europe | 26.1 | 27 | 228 | 26.55 | 27 | 15.09966887
|
||||
Hamburg | 1751775 | Europe | 29.3 | 33.4 | 755 | 31.35 | 33.4 | 27.47726333
|
||||
Vienna | 1765649 | Europe | 30.1 | 15.2 | 414.65 | 22.65 | 30.1 | 20.36295656
|
||||
Warsaw | 1715517 | Europe | 28.1 | 31.2 | 517.24 | 29.65 | 31.2 | 22.74291098
|
||||
Barselona | 1620943 | Europe | 17.7 | 18.5 | 101.9 | 18.1 | 18.5 | 10.09455299
|
||||
Munich | 1388308 | Europe | 30.2 | 20.3 | 310.43 | 25.25 | 30.2 | 17.61902381
|
||||
Milan | 1353882 | Europe | 14.5 | 26 | 181.76 | 20.25 | 26 | 13.48183964
|
||||
Prague | 1243201 | Europe | 34 | 24.5 | 496 | 29.25 | 34 | 22.27105745
|
||||
Brussels | 1138854 | Europe | 16.1 | 16.1 | 161.38 | 16.1 | 16.1 | 12.70354281
|
||||
Birmingham | 1085400 | Europe | 14.8 | 14.8 | 103.39 | 14.8 | 14.8 | 10.16808733
|
||||
Rennes | 208033 | Europe | 8 | 7.5 | 50.39 | 7.75 | 8 | 7.09859141
|
||||
Lisboa | 547631 | Europe | 8.4 | 7.8 | 958 | 8.1 | 8.4 | 30.95157508
|
||||
Sevilla | 703021 | Europe | 18.4 | 12.4 | 140 | 15.4 | 18.4 | 11.83215957
|
||||
Malaga | 568507 | Europe | 14 | 9.4 | 395 | 11.7 | 14 | 19.87460691
|
||||
Genova | 604848 | Europe | 29.9 | 9.5 | 243.6 | 19.7 | 29.9 | 15.60769041
|
||||
Parma | 187214 | Europe | 8.9 | 7.9 | 260.77 | 8.4 | 8.9 | 16.14837453
|
||||
Bologna | 384038 | Europe | 16.5 | 13.8 | 140.7 | 15.15 | 16.5 | 11.86170308
|
||||
Erlangen | 105412 | Europe | 11.6 | 12.4 | 76.9 | 12 | 12.4 | 8.769264507
|
||||
Kaltenkirchen | 19904 | Europe | 8.7 | 5.3 | 23.1 | 7 | 8.7 | 4.806245936
|
||||
Lier | 33492 | Europe | 9.5 | 13 | 49.7 | 11.25 | 13 | 7.049822693
|
||||
Decin | 50311 | Europe | 15.6 | 21.1 | 140 | 18.35 | 21.1 | 11.83215957
|
||||
Marianske Lazne | 14083 | Europe | 9.6 | 17.9 | 51.81 | 13.75 | 17.9 | 7.197916365
|
||||
Bindlach | 7211 | Europe | 9.6 | 8 | 37.6 | 8.8 | 9.6 | 6.131883887
|
||||
Moscow | 11503501 | Russia | 32.2 | 46.8 | 2511 | | |
|
||||
Saint Peterburg | 4879566 | Russia | 18.6 | 36.1 | 1439 | | |
|
||||
Novosibirsk | 1473754 | Russia | 22.6 | 43.8 | 502.1 | | |
|
||||
Yekaterinburg | 1349772 | Russia | 24.4 | 33 | 495 | | |
|
||||
Nizhny Novgorod | 1250619 | Russia | 24.1 | 30.4 | 410.68 | | |
|
||||
Samara | 1164685 | Russia | 20.1 | 44.7 | 541.382 | | |
|
||||
Kazan | 1143535 | Russia | 34.3 | 29.3 | 425.3 | | |
|
||||
Rostov-on-Don | 1089261 | Russia | 59.2 | 26.4 | 348.5 | | |
|
||||
Volgograd | 1021215 | Russia | 12.4 | 49.2 | 859.353 | | |
|
||||
Krasnoyarsk | 1035528 | Russia | 36.2 | 23.8 | 348 | | |
|
||||
Saratov | 839755 | Russia | 25.1 | 35.2 | 394 | | |
|
||||
Krasnodar | 805680 | Russia | 26.2 | 20.7 | 192.19 | | |
|
||||
Tolyatti | 718127 | Russia | 31 | 15 | 314.78 | | |
|
||||
Izhevsk | 637309 | Russia | 32.7 | 21.3 | 315.15 | | |
|
||||
Vladivostok | 600378 | Russia | 16.4 | 39.6 | 331.16 | | |
|
||||
Orenburg | 560046 | Russia | 13.6 | 36.3 | 259 | | |
|
||||
Tula | 490508 | Russia | 18.2 | 21.8 | 145.8 | | |
|
||||
Cheboksary | 479266 | Russia | 16.7 | 13.9 | 250.87 | | |
|
||||
Tver | 411044 | Russia | 20.3 | 27.8 | 152.22 | | |
|
||||
Arkhangelsk | 350985 | Russia | 13.8 | 35.7 | 294.42 | | |
|
||||
Vologda | 306487 | Russia | 18.1 | 11.1 | 116 | | |
|
||||
Nizhnevartovsk | 265994 | Russia | 20.1 | 13.7 | 71 | | |
|
||||
158
tools/python/city_radius.py
Normal file
158
tools/python/city_radius.py
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
import sys, os, math
|
||||
import matplotlib.pyplot as plt
|
||||
from optparse import OptionParser
|
||||
|
||||
|
||||
cities = []
|
||||
|
||||
def strip(s):
|
||||
return s.strip('\t\n ')
|
||||
|
||||
def load_data(path):
|
||||
|
||||
global cities
|
||||
|
||||
f = open(path, 'r')
|
||||
lines = f.readlines()
|
||||
f.close();
|
||||
|
||||
for l in lines:
|
||||
|
||||
if l.startswith('#'):
|
||||
continue
|
||||
|
||||
data = l.split('|')
|
||||
|
||||
if len(data) < 6:
|
||||
continue
|
||||
|
||||
item = {}
|
||||
|
||||
item['name'] = strip(data[0])
|
||||
item['population'] = int(strip(data[1]))
|
||||
item['region'] = strip(data[2])
|
||||
item['width'] = float(strip(data[3]))
|
||||
item['height'] = float(strip(data[4]))
|
||||
|
||||
item['square'] = float(data[5])
|
||||
|
||||
cities.append(item)
|
||||
|
||||
# build plot
|
||||
print "Cities count: %d" % len(cities)
|
||||
|
||||
def formula(popul, base = 32, mult = 0.5):
|
||||
#return math.exp(math.log(popul, base)) * mult
|
||||
return math.pow(popul, 1 / base) * mult
|
||||
|
||||
def avgDistance(approx, data):
|
||||
dist = 0
|
||||
for x in xrange(len(data)):
|
||||
dist += math.fabs(approx[x] - data[x])
|
||||
return dist / float(len(data))
|
||||
|
||||
def findBest(popul, data, minBase = 5, maxBase = 100, stepBase = 0.1, minMult = 0.01, maxMult = 1, stepMult = 0.01):
|
||||
|
||||
# try to find best parameters
|
||||
base = minBase
|
||||
|
||||
minDist = -1
|
||||
bestMult = minMult
|
||||
bestBase = base
|
||||
|
||||
while base <= maxBase:
|
||||
print "%.02f%% best mult: %f, best base: %f, best dist: %f" % (100 * (base - minBase) / (maxBase - minBase), bestMult, bestBase, minDist)
|
||||
mult = minMult
|
||||
|
||||
while mult <= maxMult:
|
||||
approx = []
|
||||
|
||||
for p in popul:
|
||||
approx.append(formula(p, base, mult))
|
||||
|
||||
dist = avgDistance(approx, data)
|
||||
|
||||
if minDist < 0 or minDist > dist:
|
||||
minDist = dist
|
||||
bestBase = base
|
||||
bestMult = mult
|
||||
|
||||
mult += stepMult
|
||||
|
||||
base += stepBase
|
||||
|
||||
return (bestBase, bestMult)
|
||||
|
||||
def process_data(steps_count, base, mult, bestFind = False, dataFlag = 0):
|
||||
avgData = []
|
||||
maxData = []
|
||||
sqrData = []
|
||||
population = []
|
||||
maxPopulation = 0
|
||||
minPopulation = -1
|
||||
for city in cities:
|
||||
p = city['population']
|
||||
w = city['width']
|
||||
h = city['height']
|
||||
s = city['square']
|
||||
population.append(p)
|
||||
if p > maxPopulation:
|
||||
maxPopulation = p
|
||||
if minPopulation < 0 or p < minPopulation:
|
||||
minPopulation = p
|
||||
|
||||
maxData.append(max([w, h]))
|
||||
avgData.append((w + h) * 0.5)
|
||||
sqrData.append(math.sqrt(s))
|
||||
|
||||
|
||||
bestBase = base
|
||||
bestMult = mult
|
||||
if bestFind:
|
||||
d = maxData
|
||||
if dataFlag == 1:
|
||||
d = avgData
|
||||
elif dataFlag == 2:
|
||||
d = sqrData
|
||||
bestBase, bestMult = findBest(population, d)
|
||||
|
||||
print "Finished\n\nBest mult: %f, Best base: %f" % (bestMult, bestBase)
|
||||
|
||||
approx = []
|
||||
population2 = []
|
||||
v = minPopulation
|
||||
step = (maxPopulation - minPopulation) / float(steps_count)
|
||||
for i in xrange(0, steps_count):
|
||||
approx.append(formula(v, bestBase, bestMult))
|
||||
population2.append(v)
|
||||
v += step
|
||||
|
||||
plt.plot(population, avgData, 'bo', population, maxData, 'ro', population, sqrData, 'go', population2, approx, 'y')
|
||||
plt.axis([minPopulation, maxPopulation, 0, 100])
|
||||
plt.xscale('log')
|
||||
plt.show()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print 'city_radius.py <data_file> <steps>'
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-f", "--file", dest="filename", default="city_popul_sqr.data",
|
||||
help="source data file", metavar="path")
|
||||
parser.add_option("-s", "--scan",
|
||||
dest="best", default=False, action="store_true",
|
||||
help="scan best values of mult and base")
|
||||
parser.add_option('-m', "--mult",
|
||||
dest='mult', default=1,
|
||||
help='multiplier value')
|
||||
parser.add_option('-b', '--base',
|
||||
dest='base', default=3.6,
|
||||
help="base value")
|
||||
parser.add_option('-d', '--data',
|
||||
default=0, dest='data',
|
||||
help="Dataset to use on best values scan: 0 - max, 1 - avg, 2 - sqr")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
load_data(options.filename)
|
||||
process_data(1000, float(options.base), float(options.mult), options.best, int(options.data))
|
||||
0
tools/python/data/__init__.py
Normal file
0
tools/python/data/__init__.py
Normal file
21
tools/python/data/all/setup.py
Executable file
21
tools/python/data/all/setup.py
Executable file
|
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
|
||||
|
||||
from data.base import get_version
|
||||
from data.base import setup
|
||||
|
||||
_V = get_version()
|
||||
|
||||
_D = [
|
||||
"omim-data-borders",
|
||||
"omim-data-essential",
|
||||
"omim-data-files",
|
||||
"omim-data-fonts",
|
||||
"omim-data-styles",
|
||||
]
|
||||
|
||||
setup(__file__, "all", [], install_requires=["{}=={}".format(d, _V) for d in _D])
|
||||
69
tools/python/data/base.py
Normal file
69
tools/python/data/base.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
import os
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
import setuptools
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
|
||||
|
||||
from pyhelpers.setup import chdir
|
||||
from pyhelpers.setup import get_version
|
||||
|
||||
|
||||
DATA_PATH = os.path.abspath(
|
||||
os.path.join(os.path.dirname(__file__), "..", "..", "..", "data")
|
||||
)
|
||||
|
||||
|
||||
def get_files_from_dir(abs_root_path, b, data_files):
|
||||
for root, dirs, files in os.walk(abs_root_path):
|
||||
data_files[b].extend(os.path.join(root, f) for f in files)
|
||||
for d in dirs:
|
||||
get_files_from_dir(
|
||||
os.path.join(abs_root_path, d), os.path.join(b, d), data_files
|
||||
)
|
||||
|
||||
|
||||
def get_data_files(relative_data_paths):
|
||||
data_files = defaultdict(lambda: [])
|
||||
for p in relative_data_paths:
|
||||
path = os.path.join(DATA_PATH, p)
|
||||
b = os.path.join("omim-data", path.replace(DATA_PATH + os.path.sep, ""))
|
||||
if os.path.isdir(path):
|
||||
get_files_from_dir(path, b, data_files)
|
||||
else:
|
||||
b = os.path.dirname(b)
|
||||
data_files[b].append(path)
|
||||
return data_files.items()
|
||||
|
||||
|
||||
def setup(
|
||||
source_file,
|
||||
suffix,
|
||||
relative_data_paths,
|
||||
packages=None,
|
||||
package_dir=None,
|
||||
install_requires=None,
|
||||
cmdclass=None,
|
||||
supported_pythons=("2", "2.7", "3", "3.5", "3.6", "3.7", "3.8", "3.9"),
|
||||
):
|
||||
with chdir(os.path.abspath(os.path.dirname(source_file))):
|
||||
setuptools.setup(
|
||||
name="omim-data-{}".format(suffix),
|
||||
version=str(get_version()),
|
||||
author="CoMaps",
|
||||
author_email="info@comaps.app",
|
||||
description="This package contains {} data files.".format(suffix),
|
||||
url="https://codeberg.org/comaps",
|
||||
packages=[] if packages is None else packages,
|
||||
package_dir={} if package_dir is None else package_dir,
|
||||
cmdclass={} if cmdclass is None else cmdclass,
|
||||
classifiers=["License :: OSI Approved :: Apache Software License",]
|
||||
+ [
|
||||
"Programming Language :: Python :: {}".format(supported_python)
|
||||
for supported_python in supported_pythons
|
||||
],
|
||||
install_requires=install_requires or [],
|
||||
data_files=get_data_files(relative_data_paths),
|
||||
)
|
||||
44
tools/python/data/borders/__init__.py
Normal file
44
tools/python/data/borders/__init__.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import logging
|
||||
import os
|
||||
import tarfile
|
||||
|
||||
from six import BytesIO
|
||||
|
||||
from data_files import find_data_files
|
||||
|
||||
try:
|
||||
import lzma
|
||||
except ImportError:
|
||||
from backports import lzma
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def init(borders_path=None):
|
||||
data_path = find_data_files("omim-data")
|
||||
|
||||
if data_path is None:
|
||||
logger.error("omim-data was not found.")
|
||||
return False
|
||||
|
||||
if borders_path is None:
|
||||
borders_path = os.path.join(data_path, "borders")
|
||||
|
||||
if not os.path.exists(borders_path):
|
||||
tar_lzma_path = os.path.join(data_path, "borders.tar.xz")
|
||||
lzma_stream = BytesIO()
|
||||
with open(tar_lzma_path, mode="rb") as f:
|
||||
decompressed = lzma.decompress(f.read())
|
||||
lzma_stream.write(decompressed)
|
||||
|
||||
lzma_stream.seek(0)
|
||||
try:
|
||||
with tarfile.open(fileobj=lzma_stream, mode="r") as tar:
|
||||
tar.extractall(borders_path)
|
||||
except PermissionError as e:
|
||||
logger.error(str(e))
|
||||
return False
|
||||
|
||||
logger.info("{} was created.".format(borders_path))
|
||||
|
||||
return True
|
||||
63
tools/python/data/borders/setup.py
Executable file
63
tools/python/data/borders/setup.py
Executable file
|
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
from distutils import log
|
||||
from distutils.command.build import build
|
||||
from distutils.command.clean import clean
|
||||
|
||||
from six import BytesIO
|
||||
|
||||
try:
|
||||
import lzma
|
||||
except ImportError:
|
||||
from backports import lzma
|
||||
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
|
||||
|
||||
from data.base import DATA_PATH
|
||||
from data.base import chdir
|
||||
from data.base import get_version
|
||||
from data.base import setup
|
||||
|
||||
|
||||
TAR_LZMA_PATH = os.path.join(DATA_PATH, "borders.tar.xz")
|
||||
|
||||
|
||||
class BuildCmd(build, object):
|
||||
def run(self):
|
||||
log.info("Creating {}".format(TAR_LZMA_PATH))
|
||||
tar_stream = BytesIO()
|
||||
borders_path = os.path.join(DATA_PATH, "borders")
|
||||
with chdir(borders_path):
|
||||
with tarfile.open(fileobj=tar_stream, mode="w") as tar:
|
||||
for f in os.listdir(borders_path):
|
||||
tar.add(f)
|
||||
|
||||
tar_stream.seek(0)
|
||||
with lzma.open(TAR_LZMA_PATH, mode="w") as f:
|
||||
f.write(tar_stream.read())
|
||||
|
||||
super(BuildCmd, self).run()
|
||||
|
||||
|
||||
class CleanCmd(clean, object):
|
||||
def run(self):
|
||||
if os.path.exists(TAR_LZMA_PATH):
|
||||
log.info("Removing {}".format(TAR_LZMA_PATH))
|
||||
os.remove(TAR_LZMA_PATH)
|
||||
|
||||
super(CleanCmd, self).run()
|
||||
|
||||
|
||||
setup(
|
||||
__file__,
|
||||
"borders",
|
||||
["borders.tar.xz", "packed_polygons.bin"],
|
||||
package_dir={"borders": ""},
|
||||
packages=["borders"],
|
||||
cmdclass={"build": BuildCmd, "clean": CleanCmd},
|
||||
install_requires=["omim-data-files=={}".format(get_version())]
|
||||
)
|
||||
44
tools/python/data/essential/setup.py
Executable file
44
tools/python/data/essential/setup.py
Executable file
|
|
@ -0,0 +1,44 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
|
||||
|
||||
from data.base import get_version
|
||||
from data.base import setup
|
||||
|
||||
setup(
|
||||
__file__,
|
||||
"essential",
|
||||
[
|
||||
"borders_vs_osm.csv",
|
||||
"categories_brands.txt",
|
||||
"categories_cuisines.txt",
|
||||
"categories.txt",
|
||||
"classificator.txt",
|
||||
"colors.txt",
|
||||
"countries_meta.txt",
|
||||
"countries_synonyms.csv",
|
||||
"countries.txt",
|
||||
"external_resources.txt",
|
||||
"fonts/blacklist.txt",
|
||||
"fonts/unicode_blocks.txt",
|
||||
"fonts/whitelist.txt",
|
||||
"hierarchy.txt",
|
||||
"mapcss-dynamic.txt",
|
||||
"mapcss-mapping.csv",
|
||||
"mixed_nodes.txt",
|
||||
"mixed_tags.txt",
|
||||
"old_vs_new.csv",
|
||||
"patterns.txt",
|
||||
"replaced_tags.txt",
|
||||
"skipped_elements.json",
|
||||
"synonyms.txt",
|
||||
"transit_colors.txt",
|
||||
"types.txt",
|
||||
"ugc_types.csv",
|
||||
"visibility.txt",
|
||||
],
|
||||
install_requires=["omim-data-files=={}".format(get_version())]
|
||||
)
|
||||
26
tools/python/data/fonts/setup.py
Executable file
26
tools/python/data/fonts/setup.py
Executable file
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
|
||||
|
||||
from data.base import get_version
|
||||
from data.base import setup
|
||||
|
||||
setup(
|
||||
__file__,
|
||||
"fonts",
|
||||
[
|
||||
"00_NotoNaskhArabic-Regular.ttf",
|
||||
"00_NotoSansThai-Regular.ttf",
|
||||
"01_dejavusans.ttf",
|
||||
"02_droidsans-fallback.ttf",
|
||||
"03_jomolhari-id-a3d.ttf",
|
||||
"04_padauk.ttf",
|
||||
"05_khmeros.ttf",
|
||||
"06_code2000.ttf",
|
||||
"07_roboto_medium.ttf",
|
||||
],
|
||||
install_requires=["omim-data-files=={}".format(get_version())]
|
||||
)
|
||||
26
tools/python/data/styles/setup.py
Executable file
26
tools/python/data/styles/setup.py
Executable file
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", ".."))
|
||||
|
||||
from data.base import get_version
|
||||
from data.base import setup
|
||||
|
||||
setup(
|
||||
__file__,
|
||||
"styles",
|
||||
[
|
||||
"drules_proto.bin",
|
||||
"drules_proto_default_light.bin",
|
||||
"drules_proto_default_light.txt",
|
||||
"drules_proto_default_dark.bin",
|
||||
"drules_proto_default_dark.txt",
|
||||
"drules_proto_vehicle_light.bin",
|
||||
"drules_proto_vehicle_light.txt",
|
||||
"drules_proto_vehicle_dark.bin",
|
||||
"drules_proto_vehicle_dark.txt",
|
||||
],
|
||||
install_requires=["omim-data-files=={}".format(get_version())]
|
||||
)
|
||||
43
tools/python/data_files/__init__.py
Normal file
43
tools/python/data_files/__init__.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import os
|
||||
import site
|
||||
import sys
|
||||
|
||||
|
||||
def find_data_files_in_user_installations(directory):
|
||||
possible_paths = [os.path.join(site.USER_BASE, directory),] + [
|
||||
os.path.normpath(os.path.join(p, "../../..", directory))
|
||||
for p in site.getusersitepackages()
|
||||
]
|
||||
|
||||
for p in possible_paths:
|
||||
if os.path.isdir(p):
|
||||
return p
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def find_data_files_in_sys_installations(directory):
|
||||
possible_paths = [os.path.join(sys.prefix, directory),] + [
|
||||
os.path.normpath(os.path.join(p, "../../..", directory))
|
||||
for p in site.getsitepackages()
|
||||
]
|
||||
for p in possible_paths:
|
||||
if os.path.isdir(p):
|
||||
return p
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def find_data_files(directory, user_inst_first=True):
|
||||
functions = [
|
||||
(int(user_inst_first), find_data_files_in_user_installations),
|
||||
(int(not user_inst_first), find_data_files_in_sys_installations),
|
||||
]
|
||||
|
||||
functions.sort(key=lambda k: k[0])
|
||||
for prior, func in functions:
|
||||
res = func(directory)
|
||||
if res is not None:
|
||||
return res
|
||||
|
||||
return None
|
||||
30
tools/python/data_files/setup.py
Normal file
30
tools/python/data_files/setup.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
import setuptools
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
|
||||
|
||||
from pyhelpers.setup import chdir
|
||||
from pyhelpers.setup import get_version
|
||||
|
||||
|
||||
with chdir(os.path.abspath(os.path.dirname(__file__))):
|
||||
supported_pythons = ("2", "2.7", "3", "3.5", "3.6", "3.7")
|
||||
setuptools.setup(
|
||||
name="omim-data-files",
|
||||
version=str(get_version()),
|
||||
author="CoMaps",
|
||||
author_email="info@comaps.app",
|
||||
description="This package is a library for dealing with data files.",
|
||||
url="https://codeberg.org/comaps",
|
||||
package_dir={"data_files": ""},
|
||||
packages=["data_files",],
|
||||
classifiers=["License :: OSI Approved :: Apache Software License",]
|
||||
+ [
|
||||
"Programming Language :: Python :: {}".format(supported_python)
|
||||
for supported_python in supported_pythons
|
||||
],
|
||||
)
|
||||
0
tools/python/descriptions/__init__.py
Normal file
0
tools/python/descriptions/__init__.py
Normal file
63
tools/python/descriptions/__main__.py
Normal file
63
tools/python/descriptions/__main__.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
import argparse
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
|
||||
import wikipediaapi
|
||||
|
||||
from descriptions.descriptions_downloader import check_and_get_checker
|
||||
from descriptions.descriptions_downloader import download_from_wikidata_tags
|
||||
from descriptions.descriptions_downloader import download_from_wikipedia_tags
|
||||
from descriptions.descriptions_downloader import log
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Download wiki pages.", usage="python3 -m descriptions "
|
||||
"--output_dir ~/maps_build/descriptions "
|
||||
"--wikipedia ~/maps_build/wiki_urls.txt "
|
||||
"--wikidata ~/maps_build/id_to_wikidata.csv "
|
||||
"--langs en de fr es ru tr"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output_dir", metavar="PATH", type=str, help="Output dir for saving pages."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--popularity", metavar="PATH", type=str,
|
||||
help="File with popular object ids with wikipedia data to download. If not given, download all objects.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--wikipedia", metavar="PATH", type=str, required=True, help="Input file with wikipedia url.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--wikidata", metavar="PATH", type=str, help="Input file with wikidata ids."
|
||||
)
|
||||
parser.add_argument("--langs", metavar="LANGS", type=str, nargs="+", action="append",
|
||||
help="Languages for pages. If left blank, pages in all available languages will be loaded.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
log.setLevel(logging.WARNING)
|
||||
wikipediaapi.log.setLevel(logging.DEBUG)
|
||||
|
||||
args = parse_args()
|
||||
wikipedia_file = args.wikipedia
|
||||
wikidata_file = args.wikidata
|
||||
output_dir = args.output_dir
|
||||
popularity_file = args.popularity
|
||||
langs = list(itertools.chain.from_iterable(args.langs))
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
checker = check_and_get_checker(popularity_file)
|
||||
download_from_wikipedia_tags(wikipedia_file, output_dir, langs, checker)
|
||||
|
||||
if wikidata_file is None:
|
||||
log.warning(f"Wikidata file not set.")
|
||||
elif os.path.exists(wikidata_file):
|
||||
download_from_wikidata_tags(wikidata_file, output_dir, langs, checker)
|
||||
else:
|
||||
log.warning(f"Wikidata ({wikidata_file}) file not found.")
|
||||
|
||||
|
||||
main()
|
||||
318
tools/python/descriptions/descriptions_downloader.py
Normal file
318
tools/python/descriptions/descriptions_downloader.py
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
import types
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import http.client
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import htmlmin
|
||||
import requests
|
||||
import wikipediaapi
|
||||
from bs4 import BeautifulSoup
|
||||
from wikidata.client import Client
|
||||
|
||||
from descriptions.exceptions import GettingError, ParseError
|
||||
|
||||
"""
|
||||
This script downloads Wikipedia pages for different languages.
|
||||
"""
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
WORKERS = 80
|
||||
REQUEST_ATTEMPTS = 8
|
||||
ATTEMPTS_PAUSE_SECONDS = 4.0
|
||||
|
||||
HEADERS = {f"h{x}" for x in range(1, 7)}
|
||||
BAD_SECTIONS = {
|
||||
"en": [
|
||||
"External links",
|
||||
"Sources",
|
||||
"See also",
|
||||
"Bibliography",
|
||||
"Further reading",
|
||||
"References",
|
||||
],
|
||||
"de": [
|
||||
"Einzelnachweise",
|
||||
"Weblinks",
|
||||
"Literatur",
|
||||
"Siehe auch",
|
||||
"Anmerkungen",
|
||||
"Anmerkungen und Einzelnachweise",
|
||||
"Filme",
|
||||
"Einzelbelege",
|
||||
],
|
||||
"fr": [
|
||||
"Bibliographie",
|
||||
"Lien externe",
|
||||
"Voir aussi",
|
||||
"Liens externes",
|
||||
"Références",
|
||||
"Notes et références",
|
||||
"Articles connexes",
|
||||
],
|
||||
"es": ["Vínculos de interés", "Véase también", "Enlaces externos", "Referencias"],
|
||||
"ru": ["Литература", "Ссылки", "См. также", "Библиография", "Примечания"],
|
||||
"tr": ["Kaynakça", "Ayrıca bakınız", "Dış bağlantılar", "Notlar", "Dipnot"],
|
||||
}
|
||||
|
||||
|
||||
def try_get(obj, prop, *args, **kwargs):
|
||||
attempts = REQUEST_ATTEMPTS
|
||||
while attempts != 0:
|
||||
try:
|
||||
attr = getattr(obj, prop)
|
||||
is_method = isinstance(attr, types.MethodType)
|
||||
return attr(*args, **kwargs) if is_method else attr
|
||||
except (
|
||||
requests.exceptions.ConnectionError,
|
||||
requests.exceptions.ReadTimeout,
|
||||
json.decoder.JSONDecodeError,
|
||||
http.client.HTTPException,
|
||||
) as e:
|
||||
log.debug(e)
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise GettingError(f"Page not found {e.msg}")
|
||||
except KeyError:
|
||||
raise GettingError(f"Getting {prop} field failed. {prop} not found.")
|
||||
except urllib.error.URLError:
|
||||
raise GettingError(f"URLError: {obj}, {prop}, {args}, {kwargs}")
|
||||
|
||||
time.sleep(random.uniform(0.0, ATTEMPTS_PAUSE_SECONDS))
|
||||
attempts -= 1
|
||||
|
||||
raise GettingError(f"Getting {prop} field failed")
|
||||
|
||||
|
||||
def read_popularity(path):
|
||||
"""
|
||||
:param path: a path of popularity file. A file contains '<id>,<rank>' rows.
|
||||
:return: a set of popularity object ids
|
||||
"""
|
||||
ids = set()
|
||||
for line in open(path):
|
||||
try:
|
||||
ident = int(line.split(",", maxsplit=1)[0])
|
||||
except (AttributeError, IndexError):
|
||||
continue
|
||||
ids.add(ident)
|
||||
return ids
|
||||
|
||||
|
||||
def should_download_page(popularity_set):
|
||||
def wrapped(ident):
|
||||
return popularity_set is None or ident in popularity_set
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def remove_bad_sections(soup, lang):
|
||||
if lang not in BAD_SECTIONS:
|
||||
return soup
|
||||
it = iter(soup.find_all())
|
||||
current = next(it, None)
|
||||
current_header_level = None
|
||||
while current is not None:
|
||||
if current.name in HEADERS and current.text.strip() in BAD_SECTIONS[lang]:
|
||||
current_header_level = current.name
|
||||
current.extract()
|
||||
current = next(it, None)
|
||||
while current is not None:
|
||||
if current.name == current_header_level:
|
||||
break
|
||||
current.extract()
|
||||
current = next(it, None)
|
||||
else:
|
||||
current = next(it, None)
|
||||
return soup
|
||||
|
||||
|
||||
def beautify_page(html, lang):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
for x in soup.find_all():
|
||||
if len(x.text.strip()) == 0:
|
||||
x.extract()
|
||||
soup = remove_bad_sections(soup, lang)
|
||||
html = str(soup.prettify())
|
||||
html = htmlmin.minify(html, remove_empty_space=True)
|
||||
return html
|
||||
|
||||
|
||||
def need_lang(lang, langs):
|
||||
return lang in langs if langs else True
|
||||
|
||||
|
||||
def get_page_info(url):
|
||||
url = urllib.parse.unquote(url)
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
try:
|
||||
lang = parsed.netloc.split(".", maxsplit=1)[0]
|
||||
except (AttributeError, IndexError):
|
||||
raise ParseError(f"{parsed.netloc} is incorrect.")
|
||||
try:
|
||||
page_name = parsed.path.rsplit("/", maxsplit=1)[-1]
|
||||
except (AttributeError, IndexError):
|
||||
raise ParseError(f"{parsed.path} is incorrect.")
|
||||
return lang, page_name
|
||||
|
||||
|
||||
def get_wiki_page(lang, page_name):
|
||||
wiki = wikipediaapi.Wikipedia(
|
||||
language=lang, extract_format=wikipediaapi.ExtractFormat.HTML
|
||||
)
|
||||
return wiki.page(page_name)
|
||||
|
||||
|
||||
def download(directory, url):
|
||||
try:
|
||||
lang, page_name = get_page_info(url)
|
||||
except ParseError:
|
||||
log.exception(f"Parsing failed. {url} is incorrect.")
|
||||
return None
|
||||
|
||||
path = os.path.join(directory, f"{lang}.html")
|
||||
if os.path.exists(path):
|
||||
log.debug(f"{path} already exists.")
|
||||
return None
|
||||
|
||||
page = get_wiki_page(lang, page_name)
|
||||
try:
|
||||
text = try_get(page, "text")
|
||||
except GettingError as e:
|
||||
log.exception(f"Error: page {page_name} is not downloaded for lang {lang} and url {url} ({e}).")
|
||||
return None
|
||||
|
||||
page_size = len(text)
|
||||
if page_size > 0:
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
text = beautify_page(text, lang)
|
||||
log.info(f"Save to {path} {lang} {page_name} {page_size}.")
|
||||
with open(path, "w") as file:
|
||||
file.write(text)
|
||||
else:
|
||||
log.warning(f"Page {url} is empty. It has not been saved.")
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def get_wiki_langs(url):
|
||||
lang, page_name = get_page_info(url)
|
||||
page = get_wiki_page(lang, page_name)
|
||||
|
||||
curr_lang = [(lang, url)]
|
||||
try:
|
||||
langlinks = try_get(page, "langlinks")
|
||||
return (
|
||||
list(zip(langlinks.keys(), [link.fullurl for link in langlinks.values()]))
|
||||
+ curr_lang
|
||||
)
|
||||
except GettingError as e:
|
||||
log.exception(f"Error: no languages for page {page_name} with url {url} ({e}).")
|
||||
return curr_lang
|
||||
|
||||
|
||||
def download_all_from_wikipedia(path, url, langs):
|
||||
try:
|
||||
available_langs = get_wiki_langs(url)
|
||||
except ParseError:
|
||||
log.exception("Parsing failed. {url} is incorrect.")
|
||||
return
|
||||
available_langs = filter(lambda x: need_lang(x[0], langs), available_langs)
|
||||
for lang in available_langs:
|
||||
download(path, lang[1])
|
||||
|
||||
|
||||
def wikipedia_worker(output_dir, checker, langs):
|
||||
def wrapped(line):
|
||||
if not line.strip():
|
||||
return
|
||||
try:
|
||||
# First param is mwm_path, which added this line entry.
|
||||
_, ident, url = line.split("\t")
|
||||
ident = int(ident)
|
||||
if not checker(ident):
|
||||
return
|
||||
url = url.strip()
|
||||
except (AttributeError, ValueError):
|
||||
log.exception(f"{line} is incorrect.")
|
||||
return
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
path = os.path.join(output_dir, parsed.netloc, parsed.path[1:])
|
||||
download_all_from_wikipedia(path, url, langs)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def download_from_wikipedia_tags(input_file, output_dir, langs, checker):
|
||||
with open(input_file) as file:
|
||||
_ = file.readline() # skip header
|
||||
with ThreadPoolExecutor(WORKERS) as pool:
|
||||
pool.map(wikipedia_worker(output_dir, checker, langs), file)
|
||||
|
||||
|
||||
def get_wikidata_urls(entity, langs):
|
||||
try:
|
||||
keys = entity.data["sitelinks"].keys()
|
||||
except (KeyError, AttributeError):
|
||||
log.exception(f"Sitelinks not found for {entity.id}.")
|
||||
return None
|
||||
return [
|
||||
entity.data["sitelinks"][k]["url"]
|
||||
for k in keys
|
||||
if any([k.startswith(lang) for lang in langs])
|
||||
]
|
||||
|
||||
|
||||
def wikidata_worker(output_dir, checker, langs):
|
||||
def wrapped(line):
|
||||
if not line.strip():
|
||||
return
|
||||
try:
|
||||
ident, wikidata_id = line.split("\t")
|
||||
ident = int(ident)
|
||||
wikidata_id = wikidata_id.strip()
|
||||
if not checker(ident):
|
||||
return
|
||||
except (AttributeError, ValueError):
|
||||
log.exception(f"{line} is incorrect.")
|
||||
return
|
||||
client = Client()
|
||||
try:
|
||||
entity = try_get(client, "get", wikidata_id, load=True)
|
||||
except GettingError:
|
||||
log.exception(f"Error: page is not downloaded {wikidata_id}.")
|
||||
return
|
||||
urls = get_wikidata_urls(entity, langs)
|
||||
if not urls:
|
||||
return
|
||||
path = os.path.join(output_dir, wikidata_id)
|
||||
for url in urls:
|
||||
download(path, url)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def download_from_wikidata_tags(input_file, output_dir, langs, checker):
|
||||
wikidata_output_dir = os.path.join(output_dir, "wikidata")
|
||||
os.makedirs(wikidata_output_dir, exist_ok=True)
|
||||
with open(input_file) as file:
|
||||
with ThreadPoolExecutor(WORKERS) as pool:
|
||||
pool.map(wikidata_worker(wikidata_output_dir, checker, langs), file)
|
||||
|
||||
|
||||
def check_and_get_checker(popularity_file):
|
||||
popularity_set = None
|
||||
if popularity_file is None:
|
||||
log.warning(f"Popularity file not set.")
|
||||
elif os.path.exists(popularity_file):
|
||||
popularity_set = read_popularity(popularity_file)
|
||||
log.info(f"Popularity set size: {len(popularity_set)}.")
|
||||
else:
|
||||
log.error(f"Popularity file ({popularity_file}) not found.")
|
||||
return should_download_page(popularity_set)
|
||||
10
tools/python/descriptions/exceptions.py
Normal file
10
tools/python/descriptions/exceptions.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
class DescriptionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ParseError(DescriptionError):
|
||||
pass
|
||||
|
||||
|
||||
class GettingError(DescriptionError):
|
||||
pass
|
||||
5
tools/python/descriptions/requirements.txt
Normal file
5
tools/python/descriptions/requirements.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
htmlmin2==0.1.13
|
||||
requests>=2.31.0
|
||||
beautifulsoup4==4.9.1
|
||||
wikidata==0.6.1
|
||||
wikipedia-api==0.5.4
|
||||
5
tools/python/descriptions/requirements_dev.txt
Normal file
5
tools/python/descriptions/requirements_dev.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
htmlmin2==0.1.13
|
||||
requests>=2.31.0
|
||||
beautifulsoup4==4.9.1
|
||||
wikidata==0.6.1
|
||||
wikipedia-api==0.5.4
|
||||
32
tools/python/descriptions/setup.py
Executable file
32
tools/python/descriptions/setup.py
Executable file
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
import setuptools
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
|
||||
|
||||
from pyhelpers.setup import chdir
|
||||
from pyhelpers.setup import get_version
|
||||
from pyhelpers.setup import get_requirements
|
||||
|
||||
|
||||
with chdir(os.path.abspath(os.path.dirname(__file__))):
|
||||
setuptools.setup(
|
||||
name="omim-descriptions",
|
||||
version=str(get_version()),
|
||||
author="CoMaps",
|
||||
author_email="info@comaps.app",
|
||||
description="This package is a library that provides descriptions "
|
||||
"(such as those from Wikipedia) to geographic objects.",
|
||||
url="https://codeberg.org/comaps",
|
||||
package_dir={"descriptions": ""},
|
||||
packages=["descriptions"],
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
],
|
||||
python_requires=">=3.6",
|
||||
install_requires=get_requirements(),
|
||||
)
|
||||
43
tools/python/generate_styles_override.py
Executable file
43
tools/python/generate_styles_override.py
Executable file
|
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
def copy_style_file(style_path, drules_suffix, target_path):
|
||||
if not os.path.exists(style_path):
|
||||
print('Path {0} is not found'.format(style_path))
|
||||
return
|
||||
|
||||
drules_proto_path = os.path.join(style_path, 'drules_proto_design.bin')
|
||||
if not os.path.exists(drules_proto_path):
|
||||
print('Path {0} is not found'.format(drules_proto_path))
|
||||
return
|
||||
shutil.copyfile(drules_proto_path, os.path.join(target_path, 'drules_proto' + drules_suffix + '.bin'))
|
||||
|
||||
for density in ['6plus', 'hdpi', 'mdpi', 'xhdpi', 'xxhdpi', 'xxxhdpi']:
|
||||
res_path = os.path.join(style_path, 'resources-' + density + "_design")
|
||||
if os.path.exists(res_path):
|
||||
shutil.copytree(res_path, os.path.join(target_path, 'resources-' + density + drules_suffix))
|
||||
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print('Usage: {0} <path_to_omim/data/styles> [<target_path>]'.format(sys.argv[0]))
|
||||
sys.exit()
|
||||
|
||||
path_to_styles = sys.argv[1]
|
||||
if not os.path.isdir(path_to_styles):
|
||||
print('Invalid path to styles folder')
|
||||
sys.exit()
|
||||
|
||||
output_name = os.path.join('' if len(sys.argv) < 3 else sys.argv[2], 'styles')
|
||||
if os.path.exists(output_name):
|
||||
shutil.rmtree(output_name)
|
||||
os.makedirs(output_name)
|
||||
|
||||
paths = ['default/light', 'default/dark', 'vehicle/light', 'vehicle/dark']
|
||||
suffixes = ['_default_light', '_default_dark', '_vehicle_light', '_vehicle_dark']
|
||||
|
||||
for i in range(0, len(paths)):
|
||||
copy_style_file(os.path.join(path_to_styles, paths[i], 'out'), suffixes[i], output_name)
|
||||
108
tools/python/generate_taginfo.py
Normal file
108
tools/python/generate_taginfo.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import re
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
SCRIPT_DIR: Path = Path(__file__).parent.resolve()
|
||||
MAPCSS_FILE: Path = SCRIPT_DIR / "../../data/styles/default/include/Icons.mapcss"
|
||||
TAGINFO_FILE: Path = SCRIPT_DIR / "../../data/taginfo.json"
|
||||
|
||||
BASE_ICON_URL: str = "https://codeberg.org/comaps/comaps/raw/branch/main/data/styles/default/light/symbols/"
|
||||
|
||||
PROJECT_INFO: dict[str, str] = {
|
||||
"name": "CoMaps",
|
||||
"description": "CoMaps is a community-focused privacy navigation iOS & Android app for travelers - drivers, hikers, and cyclists.",
|
||||
"project_url": "https://comaps.app",
|
||||
"doc_url": "https://codeberg.org/comaps/comaps/",
|
||||
"icon_url": "https://codeberg.org/comaps/comaps/media/branch/main/docs/badges/logo.svg",
|
||||
"contact_name": "CoMaps",
|
||||
"contact_email": "hello@comaps.app"
|
||||
}
|
||||
|
||||
def parse_mapcss(text: str) -> list[dict[str, any]]:
|
||||
tags: dict[tuple[str, str | None, str], dict[str, any]] = {}
|
||||
|
||||
# Split blocks into: selector { props }
|
||||
blocks: list[tuple[str, str]] = re.findall(r"([^\{]+)\{([^\}]*)\}", text, re.MULTILINE)
|
||||
|
||||
for selector, props in blocks:
|
||||
# Extract icon filename from props
|
||||
icon_re: re.Pattern = re.compile(r"icon-image:\s*([^;]+);")
|
||||
icon_match: re.Match | None = icon_re.search(props)
|
||||
icon_url: str | None = None
|
||||
if icon_match:
|
||||
icon_file: str = icon_match.group(1).strip()
|
||||
if icon_file and icon_file.lower() not in ["none", "zero-icon.svg"]:
|
||||
icon_url = BASE_ICON_URL + icon_file
|
||||
|
||||
# Split the selector into lines
|
||||
lines: list[str] = [line.strip() for line in selector.split("\n") if line.strip()]
|
||||
for line in lines:
|
||||
# Find anything inside square brackets
|
||||
square_brackets_re: re.Pattern = re.compile(r"\[(.*?)\]")
|
||||
square_brackets: list[str] = square_brackets_re.findall(line)
|
||||
if not square_brackets:
|
||||
continue
|
||||
|
||||
# Find key=value pairs
|
||||
pairs: list[tuple[str, str | None]] = []
|
||||
for sqb in square_brackets:
|
||||
key, sep, value = sqb.partition("=")
|
||||
key = key.strip()
|
||||
if key.startswith("!"):
|
||||
continue # skip negated keys
|
||||
value = value.strip() if sep else None
|
||||
pairs.append((key, value))
|
||||
|
||||
# Hardcode: convert value "not" to "no"
|
||||
pairs = [(k, "no" if v == "not" else v) for k, v in pairs]
|
||||
|
||||
if not pairs:
|
||||
continue # skip if no valid pairs
|
||||
|
||||
# Build shared description from all pairs
|
||||
desc: str = " + ".join(f"{k}={v if v is not None else '*'}" for k, v in pairs)
|
||||
|
||||
# Emit a tag per pair
|
||||
for key, value in pairs:
|
||||
tag_id: tuple[str, str | None, str] = (key, value, desc)
|
||||
if tag_id not in tags:
|
||||
tag: dict[str, any] = {
|
||||
"description": desc,
|
||||
"key": key,
|
||||
}
|
||||
if value is not None:
|
||||
tag["value"] = value
|
||||
if icon_url:
|
||||
tag["icon_url"] = icon_url
|
||||
tags[tag_id] = tag
|
||||
else:
|
||||
if icon_url:
|
||||
tags[tag_id]["icon_url"] = icon_url
|
||||
|
||||
# Sort by description, then key, then value
|
||||
return sorted(tags.values(), key=lambda x: (x["description"], x["key"], x.get("value", "")))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
with open(MAPCSS_FILE, "r", encoding="utf-8") as f:
|
||||
mapcss: str = f.read()
|
||||
|
||||
tags: list[dict[str, any]] = parse_mapcss(mapcss)
|
||||
|
||||
data: dict[str, any] = {
|
||||
"data_format": 1,
|
||||
"data_url": "https://codeberg.org/comaps/comaps/raw/branch/main/data/taginfo.json",
|
||||
"data_updated": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
|
||||
"project": PROJECT_INFO,
|
||||
"tags": tags
|
||||
}
|
||||
|
||||
with open(TAGINFO_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=4, ensure_ascii=False)
|
||||
|
||||
print(f"✅ JSON saved to {TAGINFO_FILE}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
286
tools/python/google_maps_bookmarks.py
Executable file
286
tools/python/google_maps_bookmarks.py
Executable file
|
|
@ -0,0 +1,286 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
import mimetypes
|
||||
import traceback
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
from os import path, access, R_OK, linesep
|
||||
from io import StringIO
|
||||
from datetime import datetime
|
||||
|
||||
class GoogleMapsConverter:
|
||||
def __init__(self, input_file=None, output_format=None, bookmark_list_name=None, api_key=None):
|
||||
print("Follow these steps to export your saved places from Google Maps and convert them to a GPX or KML File")
|
||||
print()
|
||||
print("1. Create an API key for Google Places API following this guide")
|
||||
print(" https://developers.google.com/maps/documentation/places/web-service/get-api-key")
|
||||
print("2. Go to https://takeout.google.com/ and sign in with your Google account")
|
||||
print("3. Select 'Saved' and 'Maps (My Places)' and create an export")
|
||||
print("4. Download and unzip the export")
|
||||
print ("5a. Look for CSV files (e.g. for lists) in the folder Takeout/Saved")
|
||||
print ("5b. Look for GeoJSON files (e.g. for Saved Places) in the folder Takeout/Maps")
|
||||
print()
|
||||
|
||||
if input_file is None:
|
||||
self.get_input_file()
|
||||
else:
|
||||
self.input_file = input_file
|
||||
if not path.isfile(self.input_file):
|
||||
raise FileNotFoundError(f"Couldn't find {self.input_file}")
|
||||
if not access(self.input_file, R_OK):
|
||||
raise PermissionError(f"Couldn't read {self.input_file}")
|
||||
|
||||
if output_format is None:
|
||||
self.get_output_format()
|
||||
else:
|
||||
self.output_format = output_format
|
||||
|
||||
if bookmark_list_name is None:
|
||||
self.get_bookmark_list_name()
|
||||
else:
|
||||
self.bookmark_list_name = bookmark_list_name
|
||||
self.output_file = self.bookmark_list_name + "." + self.output_format
|
||||
|
||||
if api_key is None:
|
||||
self.get_api_key()
|
||||
else:
|
||||
self.api_key = api_key
|
||||
|
||||
self.places = []
|
||||
|
||||
def get_input_file(self):
|
||||
while True:
|
||||
self.input_file = input("Path to the file: ")
|
||||
if not path.isfile(self.input_file):
|
||||
print(f"Couldn't find {self.input_file}")
|
||||
continue
|
||||
if not access(self.input_file, R_OK):
|
||||
print(f"Couldn't read {self.input_file}")
|
||||
continue
|
||||
break
|
||||
|
||||
def get_output_format(self):
|
||||
while True:
|
||||
self.output_format = input("Output format (kml or gpx): ").lower()
|
||||
if self.output_format not in ['kml', 'gpx']:
|
||||
print("Please provide a valid output format" + linesep)
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
def get_bookmark_list_name(self):
|
||||
while True:
|
||||
self.bookmark_list_name = input("Bookmark list name: ")
|
||||
if not self.bookmark_list_name:
|
||||
print("Please provide a name" + linesep)
|
||||
continue
|
||||
else:
|
||||
self.output_file = self.bookmark_list_name + "." + self.output_format
|
||||
break
|
||||
|
||||
def get_api_key(self):
|
||||
while True:
|
||||
if self.api_key:
|
||||
break
|
||||
self.api_key = input("API key: ")
|
||||
if not self.api_key:
|
||||
print("Please provide an API key" + linesep)
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
def convert_timestamp(self, timestamp):
|
||||
if timestamp.endswith('Z'):
|
||||
timestamp = timestamp[:-1]
|
||||
date = datetime.fromisoformat(timestamp)
|
||||
return date.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
def get_json(self, url):
|
||||
max_attempts = 3
|
||||
for retry in range(max_attempts):
|
||||
try:
|
||||
response = urllib.request.urlopen(url)
|
||||
return json.load(response)
|
||||
except urllib.error.URLError:
|
||||
print(f"Couldn't connect to Google Maps. Retrying... ({retry + 1}/{max_attempts})")
|
||||
if retry < max_attempts - 1:
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
|
||||
def get_name_and_coordinates_from_google_api(self, api_key, q=None, cid=None):
|
||||
url = None
|
||||
if q:
|
||||
params = {'query': q, 'key': api_key}
|
||||
url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?{urllib.parse.urlencode(params)}"
|
||||
elif cid:
|
||||
params = {'cid': cid, 'fields': 'geometry,name', 'key': api_key}
|
||||
url= f"https://maps.googleapis.com/maps/api/place/details/json?{urllib.parse.urlencode(params)}"
|
||||
else:
|
||||
return None
|
||||
|
||||
result = self.get_json(url)
|
||||
if result['status'] == 'OK':
|
||||
place = result.get('results', [result.get('result')])[0]
|
||||
location = place['geometry']['location']
|
||||
name = place['name']
|
||||
return {'name': name, 'coordinates': [str(location['lat']), str(location['lng'])]}
|
||||
else:
|
||||
print(f'{result.get("status", "")}: {result.get("error_message", "")}')
|
||||
return None
|
||||
|
||||
def process_geojson_features(self, content):
|
||||
try:
|
||||
geojson = json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError(f"The file {self.input_file} is not a valid JSON file.")
|
||||
for feature in geojson['features']:
|
||||
geometry = feature['geometry']
|
||||
coordinates = geometry['coordinates']
|
||||
properties = feature['properties']
|
||||
google_maps_url = properties.get('google_maps_url', '')
|
||||
location = properties.get('location', {})
|
||||
name = None
|
||||
|
||||
# Check for "null island" coordinates [0, 0]
|
||||
# These are a common artifact of Google Maps exports
|
||||
# See https://github.com/organicmaps/organicmaps/pull/8721
|
||||
if coordinates == [0, 0]:
|
||||
parsed_url = urllib.parse.urlparse(google_maps_url)
|
||||
query_params = urllib.parse.parse_qs(parsed_url.query)
|
||||
# Google Maps URLs can contain either a query string parameter 'q', 'cid'
|
||||
q = query_params.get('q', [None])[0]
|
||||
cid = query_params.get('cid', [None])[0]
|
||||
# Sometimes the 'q' parameter is a comma-separated lat long pair
|
||||
if q and ',' in q and all(part.replace('.', '', 1).replace('-', '', 1).isdigit() for part in q.split(',')):
|
||||
coordinates = q.split(',')
|
||||
else:
|
||||
result = self.get_name_and_coordinates_from_google_api(self.api_key, q=q, cid=cid)
|
||||
if result:
|
||||
coordinates = result['coordinates']
|
||||
if 'name' in result:
|
||||
name = result['name']
|
||||
else:
|
||||
print(f"Couldn't extract coordinates from Google Maps. Skipping {q or cid}")
|
||||
|
||||
coord_string = ', '.join(map(str, coordinates)) if coordinates else None
|
||||
# If name was not retrieved from the Google Maps API, then use the name from the location object,
|
||||
# with a fallback to the address, and finally to the coordinates
|
||||
if not name:
|
||||
name = location.get('name') or location.get('address') or coord_string
|
||||
|
||||
description = ""
|
||||
if 'address' in properties:
|
||||
description += f"<b>Address:</b> {location['address']}<br>"
|
||||
if 'date' in properties:
|
||||
description += f"<b>Date bookmarked:</b> {self.convert_timestamp(properties['date'])}<br>"
|
||||
if 'Comment' in properties:
|
||||
description += f"<b>Comment:</b> {properties['Comment']}<br>"
|
||||
if google_maps_url:
|
||||
description += f"<b>Google Maps URL:</b> <a href=\"{google_maps_url}\">{google_maps_url}</a><br>"
|
||||
|
||||
place = {
|
||||
'name': name,
|
||||
'description': description
|
||||
}
|
||||
if coordinates:
|
||||
place['coordinates'] = ','.join(map(str, coordinates))
|
||||
else:
|
||||
place['coordinates'] = '0,0'
|
||||
self.places.append(place)
|
||||
|
||||
def process_csv_features(self, content):
|
||||
csvreader = csv.reader(StringIO(content), delimiter=',')
|
||||
next(csvreader) # skip header
|
||||
for idx, row in enumerate(csvreader):
|
||||
name = row[0]
|
||||
description = row[1]
|
||||
url = row[2]
|
||||
print(f"\rProgress: {idx + 1} Parsing {name}...", end='')
|
||||
try:
|
||||
if url.startswith("https://www.google.com/maps/search/"):
|
||||
coordinates = url.split('/')[-1].split(',')
|
||||
coordinates.reverse()
|
||||
coordinates = ','.join(coordinates)
|
||||
elif url.startswith('https://www.google.com/maps/place/'):
|
||||
ftid = url.split('!1s')[-1]
|
||||
params = {'key': self.api_key, 'fields': 'geometry', 'ftid': ftid}
|
||||
places_url = "https://maps.googleapis.com/maps/api/place/details/json?" \
|
||||
+ urllib.parse.urlencode(params)
|
||||
try:
|
||||
data = self.get_json(places_url)
|
||||
location = data['result']['geometry']['location']
|
||||
coordinates = ','.join([str(location['lng']), str(location['lat'])])
|
||||
except (urllib.error.URLError, KeyError):
|
||||
print(f"Couldn't extract coordinates from Google Maps. Skipping {name}")
|
||||
continue
|
||||
else:
|
||||
print(f"Couldn't parse url. Skipping {name}")
|
||||
continue
|
||||
|
||||
self.places.append({'name': name, 'description': description, 'coordinates': coordinates})
|
||||
except Exception:
|
||||
print(f"Couldn't parse {name}: {traceback.format_exc()}")
|
||||
|
||||
def write_kml(self):
|
||||
root = ET.Element("kml")
|
||||
doc = ET.SubElement(root, "Document")
|
||||
for place in self.places:
|
||||
placemark = ET.SubElement(doc, "Placemark")
|
||||
ET.SubElement(placemark, "name").text = place['name']
|
||||
ET.SubElement(placemark, "description").text = place['description']
|
||||
point = ET.SubElement(placemark, "Point")
|
||||
ET.SubElement(point, "coordinates").text = place['coordinates']
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(self.output_file)
|
||||
|
||||
def write_gpx(self):
|
||||
gpx = ET.Element("gpx", version="1.1", creator="GoogleMapsConverter")
|
||||
for place in self.places:
|
||||
wpt = ET.SubElement(gpx, "wpt", lat=place['coordinates'].split(',')[1], lon=place['coordinates'].split(',')[0])
|
||||
ET.SubElement(wpt, "name").text = place['name']
|
||||
ET.SubElement(wpt, "desc").text = place['description']
|
||||
tree = ET.ElementTree(gpx)
|
||||
tree.write(self.output_file)
|
||||
|
||||
def convert(self):
|
||||
with open(self.input_file, 'r') as file:
|
||||
content = file.read().strip()
|
||||
if not content:
|
||||
raise ValueError(f"The file {self.input_file} is empty or not a valid JSON file.")
|
||||
|
||||
mime_type, _ = mimetypes.guess_type(self.input_file)
|
||||
if mime_type == 'application/geo+json' or mime_type == 'application/json':
|
||||
self.process_geojson_features(content)
|
||||
elif mime_type == 'text/csv':
|
||||
self.process_csv_features(content)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file format: {self.input_file}")
|
||||
|
||||
# Write to output file in the desired format, KML or GPX
|
||||
if self.output_format == 'kml':
|
||||
self.write_kml()
|
||||
elif self.output_format == 'gpx':
|
||||
self.write_gpx()
|
||||
print("Exported Google Saved Places to " + path.abspath(self.output_file))
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Convert Google Maps saved places to KML or GPX.")
|
||||
parser.add_argument('--input', help="Path to the file")
|
||||
parser.add_argument('--format', choices=['kml', 'gpx'], default='gpx', help="Output format: 'kml' or 'gpx'")
|
||||
parser.add_argument('--bookmark_list_name', help="Name of the bookmark list")
|
||||
parser.add_argument('--api_key', help="API key for Google Places API")
|
||||
args = parser.parse_args()
|
||||
|
||||
converter = GoogleMapsConverter(
|
||||
input_file=args.input,
|
||||
output_format=args.format,
|
||||
bookmark_list_name=args.bookmark_list_name,
|
||||
api_key=args.api_key
|
||||
)
|
||||
converter.convert()
|
||||
168
tools/python/ios_simulator_load_gpx.py
Normal file
168
tools/python/ios_simulator_load_gpx.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
GPX to iOS Simulator simctl location command
|
||||
|
||||
Converts a GPX file to simctl location start command for realistic iOS location simulation.
|
||||
|
||||
Tested with CoMaps exported tracks
|
||||
|
||||
Usage:
|
||||
python gpx_to_simctl.py test_route.gpx
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
def extract_track_points_from_gpx(gpx_file: Path):
|
||||
"""Extract track points from GPX file."""
|
||||
tree = ET.parse(gpx_file)
|
||||
root = tree.getroot()
|
||||
|
||||
points = []
|
||||
# Find all elements with lat/lon attributes
|
||||
for elem in root.findall('.//*[@lat][@lon]'):
|
||||
lat = float(elem.get('lat'))
|
||||
lon = float(elem.get('lon'))
|
||||
points.append((lat, lon))
|
||||
|
||||
return points
|
||||
|
||||
def generate_simctl_command(points, speed_kmh=60, interval=0.1, distance=None, device="booted"):
|
||||
"""Generate simctl location start command."""
|
||||
if len(points) < 2:
|
||||
raise ValueError("Need at least 2 waypoints for simctl location start")
|
||||
|
||||
# Convert km/h to m/s
|
||||
speed_mps = speed_kmh / 3.6
|
||||
|
||||
# Format waypoints as lat,lon pairs
|
||||
waypoint_strings = [f"{lat:.6f},{lon:.6f}" for lat, lon in points]
|
||||
|
||||
# Build command
|
||||
cmd = ["xcrun", "simctl", "location", device, "start"]
|
||||
cmd.append(f"--speed={speed_mps:.2f}")
|
||||
|
||||
if distance:
|
||||
cmd.append(f"--distance={distance}")
|
||||
else:
|
||||
cmd.append(f"--interval={interval}")
|
||||
|
||||
cmd.extend(waypoint_strings)
|
||||
|
||||
return cmd
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Convert GPX file to simctl location start command",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python gpx_to_simctl.py test_route.gpx --speed 60 --interval 0.1
|
||||
python gpx_to_simctl.py test_route.gpx --speed 80 --distance 10 --clear-first
|
||||
python gpx_to_simctl.py test_route.gpx --speed 50 --dry-run
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('gpx_file', help='Input GPX file')
|
||||
parser.add_argument('--speed', type=float, default=60,
|
||||
help='Speed in km/h (default: 60)')
|
||||
parser.add_argument('--interval', type=float, default=0.1,
|
||||
help='Update interval in seconds (default: 0.1)')
|
||||
parser.add_argument('--distance', type=float,
|
||||
help='Update distance in meters (overrides --interval)')
|
||||
parser.add_argument('--device', default='booted',
|
||||
help='Target device (default: booted)')
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help='Show command without executing (default: execute)')
|
||||
parser.add_argument('--clear-first', action='store_true',
|
||||
help='Clear existing location before starting')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input file
|
||||
gpx_file = Path(args.gpx_file)
|
||||
if not gpx_file.exists():
|
||||
print(f"Error: GPX file '{gpx_file}' not found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
# Extract waypoints
|
||||
points = extract_track_points_from_gpx(gpx_file)
|
||||
print(f"Extracted {len(points)} waypoints from {gpx_file}")
|
||||
|
||||
if len(points) < 2:
|
||||
print("Error: Need at least 2 waypoints for location simulation", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Generate command
|
||||
cmd = generate_simctl_command(
|
||||
points,
|
||||
speed_kmh=args.speed,
|
||||
interval=args.interval,
|
||||
distance=args.distance,
|
||||
device=args.device
|
||||
)
|
||||
|
||||
# Show command
|
||||
print(f"\nGenerated simctl command:")
|
||||
print(" ".join(cmd))
|
||||
|
||||
# Calculate simulation info
|
||||
speed_mps = args.speed / 3.6
|
||||
total_distance = 0
|
||||
for i in range(1, len(points)):
|
||||
lat1, lon1 = points[i-1]
|
||||
lat2, lon2 = points[i]
|
||||
# Simple distance approximation
|
||||
total_distance += ((lat2-lat1)**2 + (lon2-lon1)**2)**0.5 * 111000 # rough conversion to meters
|
||||
|
||||
duration = total_distance / speed_mps
|
||||
print(f"\nSimulation info:")
|
||||
print(f" Speed: {args.speed} km/h ({speed_mps:.1f} m/s)")
|
||||
print(f" Waypoints: {len(points)}")
|
||||
print(f" Estimated distance: {total_distance/1000:.2f} km")
|
||||
print(f" Estimated duration: {duration:.0f} seconds ({duration/60:.1f} minutes)")
|
||||
if args.distance:
|
||||
print(f" Update distance: {args.distance}m")
|
||||
else:
|
||||
print(f" Update interval: {args.interval}s")
|
||||
|
||||
# Execute by default unless dry-run
|
||||
if args.dry_run:
|
||||
print(f"\n[DRY RUN] Command that would be executed:")
|
||||
print(f" {' '.join(cmd)}")
|
||||
if args.clear_first:
|
||||
clear_cmd = ["xcrun", "simctl", "location", args.device, "clear"]
|
||||
print(f" (would clear location first: {' '.join(clear_cmd)})")
|
||||
else:
|
||||
print(f"\nExecuting command...")
|
||||
|
||||
# Clear location first if requested
|
||||
if args.clear_first:
|
||||
clear_cmd = ["xcrun", "simctl", "location", args.device, "clear"]
|
||||
print("Clearing existing location...")
|
||||
subprocess.run(clear_cmd, check=True)
|
||||
|
||||
# Execute the start command
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("✅ Location simulation started successfully!")
|
||||
if result.stdout.strip():
|
||||
print(result.stdout.strip())
|
||||
else:
|
||||
print(f"❌ Error executing command:")
|
||||
print(result.stderr.strip())
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
68
tools/python/maps_generator/CDN_SETUP_INSTRUCTIONS.md
Normal file
68
tools/python/maps_generator/CDN_SETUP_INSTRUCTIONS.md
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
Edit the rclone conf secret for Codeberg Actions, to deliver maps to i.e. /var/www/html/maps/251231 via a limited user.
|
||||
|
||||
apt update
|
||||
apt install nginx vim
|
||||
|
||||
### set hostname for ssh sanity (will show in console upon next bash launch):
|
||||
vim /etc/hostname
|
||||
hostname cdn-XX-1
|
||||
|
||||
### for SSL:
|
||||
sudo snap install --classic certbot
|
||||
sudo certbot --nginx
|
||||
|
||||
### remove IPs from logging on line ~36:
|
||||
vim /etc/nginx/nginx.conf
|
||||
|
||||
```
|
||||
##
|
||||
# Logging Settings
|
||||
##
|
||||
log_format comaps '0.0.0.0 - - [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"';
|
||||
access_log /var/log/nginx/access.log comaps;
|
||||
```
|
||||
|
||||
### set up monitoring:
|
||||
apt install goaccess
|
||||
edit /etc/goaccess/goaccess.conf and uncomment time-format %H:%M:%S, date-format %Y-%m-%d, log-format COMBINED
|
||||
vim /etc/crontab
|
||||
|
||||
`*/5 * * * * root /usr/bin/goaccess /var/log/nginx/access.log -o /var/www/html/monitor.html`
|
||||
|
||||
### set up basic http pages/responses:
|
||||
cd /var/www/html/
|
||||
mkdir maps
|
||||
rm index.nginx-debian.html
|
||||
wget https://www.comaps.app/favicon.ico
|
||||
vim robots.txt
|
||||
|
||||
```
|
||||
User-agent: *
|
||||
Disallow: /
|
||||
```
|
||||
|
||||
vim index.html
|
||||
|
||||
```
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<title>CoMaps CDN</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>This is a CDN for <a href="https://comaps.app">CoMaps</a></h1>
|
||||
|
||||
<h2>Resources:</h2>
|
||||
<ol>
|
||||
<li>CoMaps <a href="https://cdn.comaps.app/subway/">subway validator</a></li>
|
||||
<li>CoMaps <a href="https://comaps.app/news/">News</a></li>
|
||||
<li><a href="https://comaps.app/donate/">Donate</a></li>
|
||||
</ol>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
20
tools/python/maps_generator/FRENCH_LIBRARY_INSTRUCTIONS.md
Normal file
20
tools/python/maps_generator/FRENCH_LIBRARY_INSTRUCTIONS.md
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# French National Library Archiving
|
||||
|
||||
The library has taken an interest in archiving CoMaps and its data as a snapshot
|
||||
of our world and the way people interact with maps, in a way that doesn't rely on
|
||||
maintaining servers etc. (With an APK and MWM files and some copy-paste, you can
|
||||
reproduce our app on an emulator etc.)
|
||||
|
||||
## Instructions
|
||||
|
||||
Every 6 months or so, @jeanbaptisteC may ask to upload the most recent map version
|
||||
and a custom APK with bundled World map (googleRelease) with production keys (like web release).
|
||||
|
||||
Credentials for `frlibrary` are in the mapgen rclone, or in zyphlar/pastk's password managers.
|
||||
|
||||
To upload (modify dates accordingly):
|
||||
|
||||
```
|
||||
rclone copy CoMaps-25110702-google-release.apk frlibrary:/apk/
|
||||
rclone copy 251104 frlibrary:/maps/251104
|
||||
```
|
||||
186
tools/python/maps_generator/README.md
Normal file
186
tools/python/maps_generator/README.md
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
# maps_generator
|
||||
|
||||
`maps_generator` is the Python CLI for generating `.mwm` maps for the CoMaps. This tool functions as the driver for the `generator_tool` C++ executable.
|
||||
|
||||
**Use the `generator_tool` and application from the same release. The application does not support
|
||||
maps built by a generator_tool newer than the app.**
|
||||
|
||||
## What are maps?
|
||||
|
||||
Maps are `.mwm` binary files with special meta-information for rendering, searching, routing, and other use cases.
|
||||
Files from [data/borders](https://codeberg.org/comaps/comaps/src/branch/main/data/borders) define map boundaries for each individual file. The world is segmented into separate files by these boundaries, with the intent of having manageably small files to download. These files are referred to as *maps* or *countries*. A country is referring to one of these files, not necessarily a geographic country. Also note that there are two special countries called *World* and *WorldCoasts*. These are small simplified maps of the world and coastlines (sea and ocean watercover) used when other maps have not yet been downloaded.
|
||||
|
||||
## Setup
|
||||
|
||||
You must have Python version >= 3.7 and complete the following steps:
|
||||
|
||||
1. Switch to the branch of your app's version (see the note of #maps_generator section). E.g.:
|
||||
|
||||
```sh
|
||||
git checkout 2023.06.04-13-android
|
||||
```
|
||||
|
||||
The app version can be found in the "About" section of CoMaps.
|
||||
|
||||
2. Build the `generator_tool` binary (run from the root of the repo):
|
||||
|
||||
```sh
|
||||
./tools/unix/build_omim.sh -r generator_tool
|
||||
./tools/unix/build_omim.sh -r world_roads_builder_tool
|
||||
./tools/unix/build_omim.sh -r mwm_diff_tool
|
||||
```
|
||||
|
||||
3. Go to the `python` directory:
|
||||
|
||||
```sh
|
||||
cd tools/python/
|
||||
```
|
||||
|
||||
4. Install python dependencies:
|
||||
|
||||
```sh
|
||||
pip install -r maps_generator/requirements_dev.txt
|
||||
```
|
||||
|
||||
5. Create a [configuration file with defaults](https://codeberg.org/comaps/comaps/src/branch/main/tools/python/maps_generator/var/etc/map_generator.ini.default):
|
||||
|
||||
```sh
|
||||
cp maps_generator/var/etc/map_generator.ini.default maps_generator/var/etc/map_generator.ini
|
||||
```
|
||||
|
||||
6. Read through and edit the configuration file.
|
||||
|
||||
Ensure that `OMIM_PATH` is set correctly.
|
||||
The default `PLANET_URL` setting makes the generator to download an OpenStreetMap dump file for the North Macedonia from [Geofabrik](http://download.geofabrik.de/index.html). Change `PLANET_URL` and `PLANET_MD5_URL` to get a region you want.
|
||||
|
||||
## Basic Usage
|
||||
|
||||
Make sure you are in the `tools/python` repo directory for starting the generator.
|
||||
|
||||
```sh
|
||||
cd tools/python
|
||||
```
|
||||
|
||||
Build a `.mwm` map file for North Macedonia without using coastlines (it's a land-locked country anyway):
|
||||
```sh
|
||||
python3 -m maps_generator --countries="Macedonia" --skip="Coastline"
|
||||
```
|
||||
|
||||
It's possible to skip coastlines for countries that have a sea coast too, but the sea water will not be rendered in that case.
|
||||
|
||||
Make sure that you specify country names that are actually contained in your pbf file, or you'll get errors in the next step. Check the filenames in the `data/borders/` folder (without the `.poly` extension) for a list of all valid country names. For example, New York City is in `US_New York_New_York` and all of England (minus Ireland, Scotland, and Wales) can be generated by specifying `UK_England_*`.
|
||||
|
||||
To see other possible command-line options:
|
||||
```sh
|
||||
python3 -m maps_generator -h
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
The general log file (by default its `maps_build/generation.log`) contains output of the `maps_generator` python script only. More detailed logs that include output of the `generator_tool` binary are located in the `logs/` subdir of a particular build directory, e.g. `maps_build/2023_06_04__20_05_07/logs/`.
|
||||
|
||||
## More Examples
|
||||
|
||||
### Japan with coastlines
|
||||
|
||||
1. Open https://download.geofabrik.de/asia/japan.html and copy url of osm.pbf and md5sum files.
|
||||
2. Put the urls into the `PLANET_URL` and `PLANET_MD5_URL` settings of the `map_generator.ini` file.
|
||||
3. Set `PLANET_COASTS_URL` to a location with `latest_coasts.geom` and `latest_coasts.rawgeom` files. You don't need to download these files if the whole planet is built. They are generated in the process of building the whole planet (the coastline should be valid and continuous for it to succeed).
|
||||
4. Run
|
||||
|
||||
```sh
|
||||
python3 -m maps_generator --countries="World, WorldCoasts, Japan_*"
|
||||
```
|
||||
|
||||
### Rebuild stages
|
||||
|
||||
For example, you changed routing code in the project and want to regenerate maps.
|
||||
You must have previous generation. You may regenerate starting from the routing stage and only for two mwms:
|
||||
|
||||
```sh
|
||||
python3 -m maps_generator -c --from_stage="Routing" --countries="Japan_Kinki Region_Osaka_Osaka, Japan_Chugoku Region_Tottori"
|
||||
```
|
||||
|
||||
### Custom maps from GeoJSON
|
||||
|
||||
If you have an OSM PBF file and want to cut custom map regions, you can use a polygon feature in a GeoJSON file. This is a useful alternative if you want a custom area, or you do not want to figure out which countrie(s) apply to the area you need.
|
||||
|
||||
1. If you don't already have the .osm.pbf file, download applicable area of the world in .osm.pbf format, for example from [Geofabrik](http://download.geofabrik.de/index.html).
|
||||
2. Generate area in geojson format of the territory in which you are interested. You can do it via [geojson.io](http://geojson.io/). Select the area on the map and copy corresponding part of the resulting geojson. You need to copy the contents of the `features: [ { ... } ]`, without features array, but with inner braces: `{...}`. For example, here is the full geojson of the rectangle area around Melbourne:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {},
|
||||
"geometry": {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[143.75610351562497, -39.21523130910491],
|
||||
[147.98583984375, -39.21523130910491],
|
||||
[147.98583984375, -36.03133177633187],
|
||||
[143.75610351562497, -36.03133177633187],
|
||||
[143.75610351562497, -39.21523130910491]
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
You need to copy this part of the geojson:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {},
|
||||
"geometry": {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[143.75610351562497, -39.21523130910491],
|
||||
[147.98583984375, -39.21523130910491],
|
||||
[147.98583984375, -36.03133177633187],
|
||||
[143.75610351562497, -36.03133177633187],
|
||||
[143.75610351562497, -39.21523130910491]
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. Save selected geojson in some file with .geojson extension. For example, `borders.geojson`.
|
||||
4. Extract this area from .osm.pbf file with the help of [osmium tool](https://osmcode.org/osmium-tool/):
|
||||
|
||||
```
|
||||
osmium extract -p borders.geojson germany-latest.osm.pbf -o germany_part.osm.pbf
|
||||
```
|
||||
|
||||
5. Run the `maps_generator` tool:
|
||||
|
||||
```sh
|
||||
python3 -m maps_generator --skip="Coastline" --without_countries="World*"
|
||||
```
|
||||
|
||||
In this example we skipped generation of the World\* files because they are ones of the most time- and resources-consuming mwms.
|
||||
|
||||
### Subways layer
|
||||
|
||||
You can manually generate a subway layer file to use in the `SUBWAY_URL` ini setting. See [instructions](https://codeberg.org/comaps/comaps/src/branch/main/docs/SUBWAY_GENERATION.md).
|
||||
|
||||
## Testing maps
|
||||
If you're testing a new feature you likely wish to test the maps locally
|
||||
### iOS
|
||||
The easiest is to use the Simulator and switch out the map file in the Documents folder
|
||||
|
||||
Finding the folder is slight tricky, the easiest is to look in the Xcode debug message window, as it often prints messages that contain the Documents folder
|
||||
|
||||
E.g.,
|
||||
```
|
||||
I(1) 0.11666 platform/string_storage_base.cpp:24 StringStorageBase(): Settings path: /Users/<user-name>/Library/Developer/CoreSimulator/Devices/EFE74BF2-2871-4364-A633-BC8F1BAB9DF3/data/Containers/Data/Application/252BDFA5-3E60-43A6-B09C-158BC55DC450/Documents/settings.ini
|
||||
```
|
||||
In this folder the map file is in a YYMMDD subfolder
|
||||
19
tools/python/maps_generator/__init__.py
Normal file
19
tools/python/maps_generator/__init__.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import os
|
||||
|
||||
from maps_generator.generator import settings
|
||||
|
||||
CONFIG_PATH = os.path.join(
|
||||
os.path.dirname(os.path.join(os.path.realpath(__file__))),
|
||||
"var",
|
||||
"etc",
|
||||
"map_generator.ini",
|
||||
)
|
||||
|
||||
print(f"Loading configuration from {CONFIG_PATH}")
|
||||
|
||||
settings.init(CONFIG_PATH)
|
||||
|
||||
from maps_generator.generator import stages_declaration
|
||||
from maps_generator.generator.stages import stages
|
||||
|
||||
stages.init()
|
||||
251
tools/python/maps_generator/__main__.py
Normal file
251
tools/python/maps_generator/__main__.py
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
import logging
|
||||
import os
|
||||
from argparse import ArgumentParser
|
||||
from argparse import RawDescriptionHelpFormatter
|
||||
|
||||
from maps_generator.generator import settings
|
||||
from maps_generator.generator import stages
|
||||
from maps_generator.generator import stages_declaration as sd
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.env import PathProvider
|
||||
from maps_generator.generator.env import WORLDS_NAMES
|
||||
from maps_generator.generator.env import find_last_build_dir
|
||||
from maps_generator.generator.env import get_all_countries_list
|
||||
from maps_generator.generator.exceptions import ContinueError
|
||||
from maps_generator.generator.exceptions import SkipError
|
||||
from maps_generator.generator.exceptions import ValidationError
|
||||
from maps_generator.maps_generator import generate_coasts
|
||||
from maps_generator.maps_generator import generate_maps
|
||||
from maps_generator.utils.algo import unique
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
def parse_options():
|
||||
parser = ArgumentParser(
|
||||
description="A tool to generate map files in Organic Maps' .mwm format.",
|
||||
epilog="See maps_generator/README.md for setup instructions and usage examples.",
|
||||
formatter_class=RawDescriptionHelpFormatter,
|
||||
parents=[settings.parser],
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--continue",
|
||||
default="",
|
||||
nargs="?",
|
||||
type=str,
|
||||
help="Continue the last build or the one specified in CONTINUE from the "
|
||||
"last stopped stage.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--suffix",
|
||||
default="",
|
||||
type=str,
|
||||
help="Suffix of the name of a build directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--countries",
|
||||
type=str,
|
||||
default="",
|
||||
help="List of countries/regions, separated by a comma or a semicolon, or a path to "
|
||||
"a file with a newline-separated list of regions, for which maps "
|
||||
"should be built. Filenames in data/borders/ (without the .poly extension) "
|
||||
"represent all valid region names. "
|
||||
"A * wildcard is accepted, e.g. --countries=\"UK*\" will match "
|
||||
"UK_England_East Midlands, UK_England_East of England_Essex, etc.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--without_countries",
|
||||
type=str,
|
||||
default="",
|
||||
help="List of countries/regions to exclude from generation. "
|
||||
"Has a priority over --countries and uses the same syntax.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip",
|
||||
type=str,
|
||||
default="",
|
||||
help=f"List of stages, separated by a comma or a semicolon, "
|
||||
f"for which building will be skipped. Available skip stages: "
|
||||
f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--from_stage",
|
||||
type=str,
|
||||
default="",
|
||||
help=f"Stage from which maps will be rebuild. Available stages: "
|
||||
f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--coasts",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force_download_files",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If build is continued, files will always be downloaded again.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--production",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Build production maps. Otherwise 'OSM-data-only maps' are built "
|
||||
"without additional data like SRTM.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--order",
|
||||
type=str,
|
||||
default=os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"var/etc/file_generation_order.txt",
|
||||
),
|
||||
help="Mwm generation order, useful to have particular maps completed first "
|
||||
"in a long build (defaults to maps_generator/var/etc/file_generation_order.txt "
|
||||
"to process big countries first).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
root = logging.getLogger()
|
||||
root.addHandler(logging.NullHandler())
|
||||
|
||||
options = parse_options()
|
||||
|
||||
# Processing of 'continue' option.
|
||||
# If 'continue' is set maps generation is continued from the last build
|
||||
# that is found automatically.
|
||||
build_name = None
|
||||
continue_ = getattr(options, "continue")
|
||||
if continue_ is None or continue_:
|
||||
d = find_last_build_dir(continue_)
|
||||
if d is None:
|
||||
raise ContinueError(
|
||||
"The build cannot continue: the last build directory was not found."
|
||||
)
|
||||
build_name = d
|
||||
|
||||
countries_line = ""
|
||||
without_countries_line = ""
|
||||
if "COUNTRIES" in os.environ:
|
||||
countries_line = os.environ["COUNTRIES"]
|
||||
if options.countries:
|
||||
countries_line = options.countries
|
||||
else:
|
||||
countries_line = "*"
|
||||
|
||||
if options.without_countries:
|
||||
without_countries_line = options.without_countries
|
||||
|
||||
all_countries = get_all_countries_list(PathProvider.borders_path())
|
||||
|
||||
def end_star_compare(prefix, full):
|
||||
return full.startswith(prefix)
|
||||
|
||||
def compare(a, b):
|
||||
return a == b
|
||||
|
||||
def get_countries_set_from_line(line):
|
||||
countries = []
|
||||
used_countries = set()
|
||||
countries_list = []
|
||||
if os.path.isfile(line):
|
||||
with open(line) as f:
|
||||
countries_list = [x.strip() for x in f]
|
||||
elif line:
|
||||
countries_list = [x.strip() for x in line.replace(";", ",").split(",")]
|
||||
|
||||
for country_item in countries_list:
|
||||
cmp = compare
|
||||
_raw_country = country_item[:]
|
||||
if _raw_country and _raw_country[-1] == "*":
|
||||
_raw_country = _raw_country.replace("*", "")
|
||||
cmp = end_star_compare
|
||||
|
||||
for country in all_countries:
|
||||
if cmp(_raw_country, country):
|
||||
used_countries.add(country_item)
|
||||
countries.append(country)
|
||||
|
||||
countries = unique(countries)
|
||||
diff = set(countries_list) - used_countries
|
||||
if diff:
|
||||
raise ValidationError(f"Bad input countries: {', '.join(diff)}")
|
||||
return set(countries)
|
||||
|
||||
countries = get_countries_set_from_line(countries_line)
|
||||
without_countries = get_countries_set_from_line(without_countries_line)
|
||||
countries -= without_countries
|
||||
countries = list(countries)
|
||||
if not countries:
|
||||
countries = all_countries
|
||||
|
||||
# Processing of 'order' option.
|
||||
# It defines an order of countries generation using a file from 'order' path.
|
||||
if options.order:
|
||||
ordered_countries = []
|
||||
countries = set(countries)
|
||||
with open(options.order) as file:
|
||||
for c in file:
|
||||
if c.strip().startswith("#"):
|
||||
continue
|
||||
c = c.split("\t")[0].strip()
|
||||
if c in countries:
|
||||
ordered_countries.append(c)
|
||||
countries.remove(c)
|
||||
if countries:
|
||||
raise ValueError(
|
||||
f"{options.order} does not have an order " f"for {countries}."
|
||||
)
|
||||
countries = ordered_countries
|
||||
|
||||
# Processing of 'skip' option.
|
||||
skipped_stages = set()
|
||||
if options.skip:
|
||||
for s in options.skip.replace(";", ",").split(","):
|
||||
stage = s.strip()
|
||||
if not stages.stages.is_valid_stage_name(stage):
|
||||
raise SkipError(f"{stage} not found.")
|
||||
skipped_stages.add(stages.get_stage_type(stage))
|
||||
|
||||
if settings.PLANET_URL != settings.DEFAULT_PLANET_URL:
|
||||
skipped_stages.add(sd.StageUpdatePlanet)
|
||||
|
||||
if sd.StageCoastline in skipped_stages:
|
||||
if any(x in WORLDS_NAMES for x in options.countries):
|
||||
raise SkipError(
|
||||
f"You can not skip {stages.get_stage_name(sd.StageCoastline)}"
|
||||
f" if you want to generate {WORLDS_NAMES}."
|
||||
f" You can exclude them with --without_countries option."
|
||||
)
|
||||
|
||||
if not settings.NEED_PLANET_UPDATE:
|
||||
skipped_stages.add(sd.StageUpdatePlanet)
|
||||
|
||||
if not settings.NEED_BUILD_WORLD_ROADS:
|
||||
skipped_stages.add(sd.StagePrepareRoutingWorld)
|
||||
skipped_stages.add(sd.StageRoutingWorld)
|
||||
|
||||
# Make env and run maps generation.
|
||||
env = Env(
|
||||
countries=countries,
|
||||
production=options.production,
|
||||
build_name=build_name,
|
||||
build_suffix=options.suffix,
|
||||
skipped_stages=skipped_stages,
|
||||
force_download_files=options.force_download_files
|
||||
)
|
||||
from_stage = None
|
||||
if options.from_stage:
|
||||
from_stage = f"{options.from_stage}"
|
||||
if options.coasts:
|
||||
generate_coasts(env, from_stage)
|
||||
else:
|
||||
generate_maps(env, from_stage)
|
||||
env.finish()
|
||||
|
||||
|
||||
main()
|
||||
60
tools/python/maps_generator/check_logs.py
Normal file
60
tools/python/maps_generator/check_logs.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import argparse
|
||||
import sys
|
||||
|
||||
from maps_generator.checks.default_check_set import CheckType
|
||||
from maps_generator.checks.default_check_set import LogsChecks
|
||||
from maps_generator.checks.default_check_set import get_logs_check_sets_and_filters
|
||||
from maps_generator.checks.default_check_set import run_checks_and_print_results
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="This script checks maps generation logs and prints results."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--old", type=str, required=True, help="Path to old logs directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--new", type=str, required=True, help="Path to new logs directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--checks",
|
||||
action="store",
|
||||
type=str,
|
||||
nargs="*",
|
||||
default=None,
|
||||
help=f"Set of checks: {', '.join(c.name for c in LogsChecks)}. "
|
||||
f"By default, all checks will run.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--level",
|
||||
type=str,
|
||||
required=False,
|
||||
choices=("low", "medium", "hard", "strict"),
|
||||
default="medium",
|
||||
help="Messages level.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
required=False,
|
||||
default="",
|
||||
help="Path to output file. stdout by default.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
|
||||
checks = {LogsChecks[c] for c in args.checks} if args.checks is not None else None
|
||||
s = get_logs_check_sets_and_filters(args.old, args.new, checks)
|
||||
run_checks_and_print_results(
|
||||
s,
|
||||
CheckType[args.level],
|
||||
file=open(args.output, "w") if args.output else sys.stdout,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
65
tools/python/maps_generator/check_mwms.py
Normal file
65
tools/python/maps_generator/check_mwms.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import argparse
|
||||
import sys
|
||||
|
||||
from maps_generator.checks.default_check_set import CheckType
|
||||
from maps_generator.checks.default_check_set import MwmsChecks
|
||||
from maps_generator.checks.default_check_set import get_mwm_check_sets_and_filters
|
||||
from maps_generator.checks.default_check_set import run_checks_and_print_results
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="This script checks mwms and prints results."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--old", type=str, required=True, help="Path to old mwm directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--new", type=str, required=True, help="Path to new mwm directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--categories", type=str, required=True, help="Path to categories file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--checks",
|
||||
action="store",
|
||||
type=str,
|
||||
nargs="*",
|
||||
default=None,
|
||||
help=f"Set of checks: {', '.join(c.name for c in MwmsChecks)}. "
|
||||
f"By default, all checks will run.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--level",
|
||||
type=str,
|
||||
required=False,
|
||||
choices=("low", "medium", "hard", "strict"),
|
||||
default="medium",
|
||||
help="Messages level.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
required=False,
|
||||
default="",
|
||||
help="Path to output file. stdout by default.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
|
||||
checks = {MwmsChecks[c] for c in args.checks} if args.checks else None
|
||||
s = get_mwm_check_sets_and_filters(
|
||||
args.old, args.new, checks, categories_path=args.categories
|
||||
)
|
||||
run_checks_and_print_results(
|
||||
s,
|
||||
CheckType[args.level],
|
||||
file=open(args.output, "w") if args.output else sys.stdout,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
tools/python/maps_generator/checks/__init__.py
Normal file
0
tools/python/maps_generator/checks/__init__.py
Normal file
309
tools/python/maps_generator/checks/check.py
Normal file
309
tools/python/maps_generator/checks/check.py
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
import os
|
||||
import sys
|
||||
from abc import ABC
|
||||
from abc import abstractmethod
|
||||
from collections import namedtuple
|
||||
from enum import Enum
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
from typing import Callable
|
||||
from typing import List
|
||||
|
||||
ResLine = namedtuple("ResLine", ["previous", "current", "diff", "arrow"])
|
||||
|
||||
|
||||
class Arrow(Enum):
|
||||
zero = 0
|
||||
down = 1
|
||||
up = 2
|
||||
|
||||
|
||||
ROW_TO_STR = {
|
||||
Arrow.zero: "◄►",
|
||||
Arrow.down: "▼",
|
||||
Arrow.up: "▲",
|
||||
}
|
||||
|
||||
|
||||
def norm(value):
|
||||
if isinstance(value, (int, float)):
|
||||
return abs(value)
|
||||
elif hasattr(value, "__len__"):
|
||||
return len(value)
|
||||
elif hasattr(value, "norm"):
|
||||
return value.norm()
|
||||
|
||||
assert False, type(value)
|
||||
|
||||
|
||||
def get_rel(r: ResLine) -> bool:
|
||||
rel = 0.0
|
||||
if r.arrow != Arrow.zero:
|
||||
prev = norm(r.previous)
|
||||
if prev == 0:
|
||||
rel = 100.0
|
||||
else:
|
||||
rel = norm(r.diff) * 100.0 / prev
|
||||
return rel
|
||||
|
||||
|
||||
class Check(ABC):
|
||||
"""
|
||||
Base class for any checks.
|
||||
Usual flow:
|
||||
|
||||
# Create check object.
|
||||
check = AnyCheck("ExampleCheck")
|
||||
# Do work.
|
||||
check.check()
|
||||
|
||||
# Get results and process them
|
||||
raw_result = check.get_result()
|
||||
process_result(raw_result)
|
||||
|
||||
# or print result
|
||||
check.print()
|
||||
"""
|
||||
def __init__(self, name: str):
|
||||
self.name = name
|
||||
|
||||
def print(self, silent_if_no_results=False, filt=None, file=sys.stdout):
|
||||
s = self.formatted_string(silent_if_no_results, filt)
|
||||
if s:
|
||||
print(s, file=file)
|
||||
|
||||
@abstractmethod
|
||||
def check(self):
|
||||
"""
|
||||
Performs a logic of the check.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_result(self) -> Any:
|
||||
"""
|
||||
Returns a raw result of the check.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def formatted_string(self, silent_if_no_results=False, *args, **kwargs) -> str:
|
||||
"""
|
||||
Returns a formatted string of a raw result of the check.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class CompareCheckBase(Check, ABC):
|
||||
def __init__(self, name: str):
|
||||
super().__init__(name)
|
||||
self.op: Callable[
|
||||
[Any, Any], Any
|
||||
] = lambda previous, current: current - previous
|
||||
self.do: Callable[[Any], Any] = lambda x: x
|
||||
self.zero: Any = 0
|
||||
self.diff_format: Callable[[Any], str] = lambda x: str(x)
|
||||
self.format: Callable[[Any], str] = lambda x: str(x)
|
||||
self.filt: Callable[[Any], bool] = lambda x: True
|
||||
|
||||
def set_op(self, op: Callable[[Any, Any], Any]):
|
||||
self.op = op
|
||||
|
||||
def set_do(self, do: Callable[[Any], Any]):
|
||||
self.do = do
|
||||
|
||||
def set_zero(self, zero: Any):
|
||||
self.zero = zero
|
||||
|
||||
def set_diff_format(self, diff_format: Callable[[Any], str]):
|
||||
self.diff_format = diff_format
|
||||
|
||||
def set_format(self, format: Callable[[Any], str]):
|
||||
self.format = format
|
||||
|
||||
def set_filt(self, filt: Callable[[Any], bool]):
|
||||
self.filt = filt
|
||||
|
||||
|
||||
class CompareCheck(CompareCheckBase):
|
||||
def __init__(
|
||||
self, name: str, old: Any, new: Any,
|
||||
):
|
||||
super().__init__(name)
|
||||
|
||||
self.old = old
|
||||
self.new = new
|
||||
self.result = None
|
||||
|
||||
def set_op(self, op: Callable[[Any, Any], Any]):
|
||||
self.op = op
|
||||
|
||||
def set_do(self, do: Callable[[Any], Any]):
|
||||
self.do = do
|
||||
|
||||
def set_zero(self, zero: Any):
|
||||
self.zero = zero
|
||||
|
||||
def get_result(self) -> ResLine:
|
||||
return self.result
|
||||
|
||||
def check(self):
|
||||
previous = self.do(self.old)
|
||||
if previous is None:
|
||||
return False
|
||||
|
||||
current = self.do(self.new)
|
||||
if current is None:
|
||||
return False
|
||||
|
||||
diff = self.op(previous, current)
|
||||
if diff is None:
|
||||
return False
|
||||
|
||||
arrow = Arrow.zero
|
||||
if diff > self.zero:
|
||||
arrow = Arrow.up
|
||||
elif diff < self.zero:
|
||||
arrow = Arrow.down
|
||||
|
||||
self.result = ResLine(
|
||||
previous=previous, current=current, diff=diff, arrow=arrow
|
||||
)
|
||||
return True
|
||||
|
||||
def formatted_string(self, silent_if_no_results=False, *args, **kwargs) -> str:
|
||||
assert self.result
|
||||
|
||||
if silent_if_no_results and self.result.arrow == Arrow.zero:
|
||||
return ""
|
||||
|
||||
rel = get_rel(self.result)
|
||||
return (
|
||||
f"{self.name}: {ROW_TO_STR[self.result.arrow]} {rel:.2f}% "
|
||||
f"[{self.format(self.result.previous)} → "
|
||||
f"{self.format(self.result.current)}: "
|
||||
f"{self.diff_format(self.result.diff)}]"
|
||||
)
|
||||
|
||||
|
||||
class CompareCheckSet(CompareCheckBase):
|
||||
def __init__(self, name: str):
|
||||
super().__init__(name)
|
||||
|
||||
self.checks = []
|
||||
|
||||
def add_check(self, check: Check):
|
||||
self.checks.append(check)
|
||||
|
||||
def set_op(self, op: Callable[[Any, Any], Any]):
|
||||
for c in self.checks:
|
||||
c.set_op(op)
|
||||
|
||||
def set_do(self, do: Callable[[Any], Any]):
|
||||
for c in self.checks:
|
||||
c.set_do(do)
|
||||
|
||||
def set_zero(self, zero: Any):
|
||||
for c in self.checks:
|
||||
c.set_zero(zero)
|
||||
|
||||
def set_diff_format(self, diff_format: Callable[[Any], str]):
|
||||
for c in self.checks:
|
||||
c.set_diff_format(diff_format)
|
||||
|
||||
def set_format(self, format: Callable[[Any], str]):
|
||||
for c in self.checks:
|
||||
c.set_format(format)
|
||||
|
||||
def check(self):
|
||||
for c in self.checks:
|
||||
c.check()
|
||||
|
||||
def get_result(self,) -> List[ResLine]:
|
||||
return [c.get_result() for c in self._with_result()]
|
||||
|
||||
def formatted_string(self, silent_if_no_results=False, filt=None, _offset=0) -> str:
|
||||
sets = filter(lambda c: isinstance(c, CompareCheckSet), self._with_result())
|
||||
checks = filter(lambda c: isinstance(c, CompareCheck), self._with_result())
|
||||
checks = sorted(checks, key=lambda c: norm(c.get_result().diff), reverse=True)
|
||||
|
||||
if filt is None:
|
||||
filt = self.filt
|
||||
|
||||
checks = filter(lambda c: filt(c.get_result()), checks)
|
||||
|
||||
sets = list(sets)
|
||||
checks = list(checks)
|
||||
|
||||
no_results = not checks and not sets
|
||||
if silent_if_no_results and no_results:
|
||||
return ""
|
||||
|
||||
head = [
|
||||
f"{' ' * _offset}Check set[{self.name}]:",
|
||||
]
|
||||
|
||||
lines = []
|
||||
if no_results:
|
||||
lines.append(f"{' ' * (_offset + 2)}No results.")
|
||||
|
||||
for c in checks:
|
||||
s = c.formatted_string(silent_if_no_results, filt, _offset + 2)
|
||||
if s:
|
||||
lines.append(f"{' ' * (_offset + 2)}{s}")
|
||||
|
||||
for s in sets:
|
||||
s = s.formatted_string(silent_if_no_results, filt, _offset + 2)
|
||||
if s:
|
||||
lines.append(s)
|
||||
|
||||
if not lines:
|
||||
return ""
|
||||
|
||||
head += lines
|
||||
return "\n".join(head) + "\n"
|
||||
|
||||
def _with_result(self):
|
||||
return (c for c in self.checks if c.get_result() is not None)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def _get_and_check_files(old_path, new_path, ext):
|
||||
files = list(filter(lambda f: f.endswith(ext), os.listdir(old_path)))
|
||||
s = set(files) ^ set(filter(lambda f: f.endswith(ext), os.listdir(new_path)))
|
||||
assert len(s) == 0, s
|
||||
return files
|
||||
|
||||
|
||||
def build_check_set_for_files(
|
||||
name: str,
|
||||
old_path: str,
|
||||
new_path: str,
|
||||
*,
|
||||
ext: str = "",
|
||||
recursive: bool = False,
|
||||
op: Callable[[Any, Any], Any] = lambda previous, current: current - previous,
|
||||
do: Callable[[Any], Any] = lambda x: x,
|
||||
zero: Any = 0,
|
||||
diff_format: Callable[[Any], str] = lambda x: str(x),
|
||||
format: Callable[[Any], str] = lambda x: str(x),
|
||||
):
|
||||
if recursive:
|
||||
raise NotImplementedError(
|
||||
f"CheckSetBuilderForFiles is not implemented for recursive."
|
||||
)
|
||||
|
||||
cs = CompareCheckSet(name)
|
||||
for file in _get_and_check_files(old_path, new_path, ext):
|
||||
cs.add_check(
|
||||
CompareCheck(
|
||||
file, os.path.join(old_path, file), os.path.join(new_path, file)
|
||||
)
|
||||
)
|
||||
|
||||
cs.set_do(do)
|
||||
cs.set_op(op)
|
||||
cs.set_zero(zero)
|
||||
cs.set_diff_format(diff_format)
|
||||
cs.set_format(format)
|
||||
return cs
|
||||
34
tools/python/maps_generator/checks/check_addresses.py
Normal file
34
tools/python/maps_generator/checks/check_addresses.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import re
|
||||
|
||||
from maps_generator.checks import check
|
||||
from maps_generator.checks.logs import logs_reader
|
||||
|
||||
|
||||
ADDR_PATTERN = re.compile(
|
||||
r".*BuildAddressTable\(\) Address: "
|
||||
r"Matched percent (?P<matched_percent>[0-9.]+) "
|
||||
r"Total: (?P<total>\d+) "
|
||||
r"Missing: (?P<missing>\d+)"
|
||||
)
|
||||
|
||||
|
||||
def get_addresses_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns an addresses check set, that checks a difference in 'matched_percent'
|
||||
addresses of BuildAddressTable between old logs and new logs.
|
||||
"""
|
||||
def do(path: str):
|
||||
log = logs_reader.Log(path)
|
||||
if not log.is_mwm_log:
|
||||
return None
|
||||
|
||||
found = logs_reader.find_and_parse(log.lines, ADDR_PATTERN)
|
||||
if not found:
|
||||
return None
|
||||
|
||||
d = found[0][0]
|
||||
return float(d["matched_percent"])
|
||||
|
||||
return check.build_check_set_for_files(
|
||||
"Addresses check", old_path, new_path, ext=".log", do=do
|
||||
)
|
||||
58
tools/python/maps_generator/checks/check_categories.py
Normal file
58
tools/python/maps_generator/checks/check_categories.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
from collections import defaultdict
|
||||
|
||||
from maps_generator.checks import check
|
||||
from maps_generator.checks.check_mwm_types import count_all_types
|
||||
from mwm import NAME_TO_INDEX_TYPE_MAPPING
|
||||
|
||||
|
||||
def parse_groups(path):
|
||||
groups = defaultdict(set)
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith("#"):
|
||||
continue
|
||||
|
||||
if line.startswith("@"):
|
||||
continue
|
||||
|
||||
array = line.split("@", maxsplit=1)
|
||||
if len(array) == 2:
|
||||
types_str, categories = array
|
||||
types_int = {
|
||||
NAME_TO_INDEX_TYPE_MAPPING[t]
|
||||
for t in types_str.strip("|").split("|")
|
||||
}
|
||||
for category in categories.split("|"):
|
||||
category = category.replace("@", "", 1)
|
||||
groups[category].update(types_int)
|
||||
return groups
|
||||
|
||||
|
||||
def get_categories_check_set(
|
||||
old_path: str, new_path: str, categories_path: str
|
||||
) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns a categories check set, that checks a difference in a number of
|
||||
objects of categories(from categories.txt) between old mwms and new mwms.
|
||||
"""
|
||||
cs = check.CompareCheckSet("Categories check")
|
||||
|
||||
def make_do(indexes):
|
||||
def do(path):
|
||||
all_types = count_all_types(path)
|
||||
return sum(all_types[i] for i in indexes)
|
||||
|
||||
return do
|
||||
|
||||
for category, types in parse_groups(categories_path).items():
|
||||
cs.add_check(
|
||||
check.build_check_set_for_files(
|
||||
f"Category {category} check",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".mwm",
|
||||
do=make_do(types),
|
||||
)
|
||||
)
|
||||
return cs
|
||||
49
tools/python/maps_generator/checks/check_log_levels.py
Normal file
49
tools/python/maps_generator/checks/check_log_levels.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import logging
|
||||
from functools import lru_cache
|
||||
|
||||
from maps_generator.checks import check
|
||||
from maps_generator.checks.logs import logs_reader
|
||||
from maps_generator.generator.stages_declaration import stages
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def _get_log_stages(path):
|
||||
log = logs_reader.Log(path)
|
||||
return logs_reader.normalize_logs(logs_reader.split_into_stages(log))
|
||||
|
||||
|
||||
def get_log_levels_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns a log levels check set, that checks a difference in a number of
|
||||
message levels from warning and higher for each stage between old mwms
|
||||
and new mwms.
|
||||
"""
|
||||
cs = check.CompareCheckSet("Log levels check")
|
||||
|
||||
def make_do(level, stage_name, cache={}):
|
||||
def do(path):
|
||||
for s in _get_log_stages(path):
|
||||
if s.name == stage_name:
|
||||
k = f"{path}:{stage_name}"
|
||||
if k not in cache:
|
||||
cache[k] = logs_reader.count_levels(s)
|
||||
|
||||
return cache[k][level]
|
||||
return None
|
||||
|
||||
return do
|
||||
|
||||
for stage_name in (
|
||||
stages.get_visible_stages_names() + stages.get_invisible_stages_names()
|
||||
):
|
||||
for level in (logging.CRITICAL, logging.ERROR, logging.WARNING):
|
||||
cs.add_check(
|
||||
check.build_check_set_for_files(
|
||||
f"Stage {stage_name} - {logging.getLevelName(level)} check",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".log",
|
||||
do=make_do(level, stage_name),
|
||||
)
|
||||
)
|
||||
return cs
|
||||
61
tools/python/maps_generator/checks/check_mwm_types.py
Normal file
61
tools/python/maps_generator/checks/check_mwm_types.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
from collections import defaultdict
|
||||
from functools import lru_cache
|
||||
from typing import Union
|
||||
|
||||
from maps_generator.checks import check
|
||||
from mwm import Mwm
|
||||
from mwm import NAME_TO_INDEX_TYPE_MAPPING
|
||||
from mwm import readable_type
|
||||
from mwm import type_index
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def count_all_types(path: str):
|
||||
c = defaultdict(int)
|
||||
for ft in Mwm(path, parse=False):
|
||||
for t in ft.types():
|
||||
c[t] += 1
|
||||
return c
|
||||
|
||||
|
||||
def get_mwm_type_check_set(
|
||||
old_path: str, new_path: str, type_: Union[str, int]
|
||||
) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns a mwm type check set, that checks a difference in a number of
|
||||
type [type_] between old mwms and new mwms.
|
||||
"""
|
||||
if isinstance(type_, str):
|
||||
type_ = type_index(type_)
|
||||
assert type_ >= 0, type_
|
||||
|
||||
return check.build_check_set_for_files(
|
||||
f"Types check [{readable_type(type_)}]",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".mwm",
|
||||
do=lambda path: count_all_types(path)[type_],
|
||||
)
|
||||
|
||||
|
||||
def get_mwm_types_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns a mwm types check set, that checks a difference in a number of
|
||||
each type between old mwms and new mwms.
|
||||
"""
|
||||
cs = check.CompareCheckSet("Mwm types check")
|
||||
|
||||
def make_do(index):
|
||||
return lambda path: count_all_types(path)[index]
|
||||
|
||||
for t_name, t_index in NAME_TO_INDEX_TYPE_MAPPING.items():
|
||||
cs.add_check(
|
||||
check.build_check_set_for_files(
|
||||
f"Type {t_name} check",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".mwm",
|
||||
do=make_do(t_index),
|
||||
)
|
||||
)
|
||||
return cs
|
||||
124
tools/python/maps_generator/checks/check_sections.py
Normal file
124
tools/python/maps_generator/checks/check_sections.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
import os
|
||||
from functools import lru_cache
|
||||
|
||||
from maps_generator.checks import check
|
||||
from mwm import Mwm
|
||||
|
||||
|
||||
class SectionNames:
|
||||
def __init__(self, sections):
|
||||
self.sections = sections
|
||||
|
||||
def __sub__(self, other):
|
||||
return SectionNames(
|
||||
{k: self.sections[k] for k in set(self.sections) - set(other.sections)}
|
||||
)
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, int):
|
||||
return len(self.sections) < other
|
||||
elif isinstance(other, SectionNames):
|
||||
return self.sections < other.sections
|
||||
assert False, type(other)
|
||||
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, int):
|
||||
return len(self.sections) > other
|
||||
elif isinstance(other, SectionNames):
|
||||
return self.sections > other.sections
|
||||
assert False, type(other)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.sections)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.sections)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def read_sections(path: str):
|
||||
return Mwm(path, parse=False).sections_info()
|
||||
|
||||
|
||||
def get_appeared_sections_check_set(
|
||||
old_path: str, new_path: str
|
||||
) -> check.CompareCheckSet:
|
||||
return check.build_check_set_for_files(
|
||||
f"Appeared sections check",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".mwm",
|
||||
do=lambda path: SectionNames(read_sections(path)),
|
||||
diff_format=lambda s: ", ".join(f"{k}:{v.size}" for k, v in s.sections.items()),
|
||||
format=lambda s: f"number of sections: {len(s.sections)}",
|
||||
)
|
||||
|
||||
|
||||
def get_disappeared_sections_check_set(
|
||||
old_path: str, new_path: str
|
||||
) -> check.CompareCheckSet:
|
||||
return check.build_check_set_for_files(
|
||||
f"Disappeared sections check",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".mwm",
|
||||
do=lambda path: SectionNames(read_sections(path)),
|
||||
op=lambda previous, current: previous - current,
|
||||
diff_format=lambda s: ", ".join(f"{k}:{v.size}" for k, v in s.sections.items()),
|
||||
format=lambda s: f"number of sections: {len(s.sections)}",
|
||||
)
|
||||
|
||||
|
||||
def get_sections_existence_check_set(
|
||||
old_path: str, new_path: str
|
||||
) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns a sections existence check set, that checks appeared and
|
||||
disappeared sections between old mwms and new mwms.
|
||||
"""
|
||||
cs = check.CompareCheckSet("Sections existence check")
|
||||
cs.add_check(get_appeared_sections_check_set(old_path, new_path))
|
||||
cs.add_check(get_disappeared_sections_check_set(old_path, new_path))
|
||||
return cs
|
||||
|
||||
|
||||
def _get_sections_set(path):
|
||||
sections = set()
|
||||
for file in os.listdir(path):
|
||||
p = os.path.join(path, file)
|
||||
if os.path.isfile(p) and file.endswith(".mwm"):
|
||||
sections.update(read_sections(p).keys())
|
||||
return sections
|
||||
|
||||
|
||||
def get_sections_size_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns a sections size check set, that checks a difference in a size
|
||||
of each sections of mwm between old mwms and new mwms.
|
||||
"""
|
||||
sections_set = _get_sections_set(old_path)
|
||||
sections_set.update(_get_sections_set(new_path))
|
||||
|
||||
cs = check.CompareCheckSet("Sections size check")
|
||||
|
||||
def make_do(section):
|
||||
def do(path):
|
||||
sections = read_sections(path)
|
||||
if section not in sections:
|
||||
return None
|
||||
|
||||
return sections[section].size
|
||||
|
||||
return do
|
||||
|
||||
for section in sections_set:
|
||||
cs.add_check(
|
||||
check.build_check_set_for_files(
|
||||
f"Size of {section} check",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".mwm",
|
||||
do=make_do(section),
|
||||
)
|
||||
)
|
||||
return cs
|
||||
17
tools/python/maps_generator/checks/check_size.py
Normal file
17
tools/python/maps_generator/checks/check_size.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import os
|
||||
|
||||
from maps_generator.checks import check
|
||||
|
||||
|
||||
def get_size_check_set(old_path: str, new_path: str) -> check.CompareCheckSet:
|
||||
"""
|
||||
Returns a size check set, that checks a difference in a size of mwm between
|
||||
old mwms and new mwms.
|
||||
"""
|
||||
return check.build_check_set_for_files(
|
||||
"Size check",
|
||||
old_path,
|
||||
new_path,
|
||||
ext=".mwm",
|
||||
do=lambda path: os.path.getsize(path),
|
||||
)
|
||||
167
tools/python/maps_generator/checks/default_check_set.py
Normal file
167
tools/python/maps_generator/checks/default_check_set.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
import sys
|
||||
from collections import namedtuple
|
||||
from enum import Enum
|
||||
from typing import Callable
|
||||
from typing import Mapping
|
||||
from typing import Optional
|
||||
from typing import Set
|
||||
from typing import Tuple
|
||||
|
||||
from maps_generator.checks import check
|
||||
from maps_generator.checks.check_addresses import get_addresses_check_set
|
||||
from maps_generator.checks.check_categories import get_categories_check_set
|
||||
from maps_generator.checks.check_log_levels import get_log_levels_check_set
|
||||
from maps_generator.checks.check_mwm_types import get_mwm_type_check_set
|
||||
from maps_generator.checks.check_mwm_types import get_mwm_types_check_set
|
||||
from maps_generator.checks.check_sections import get_sections_existence_check_set
|
||||
from maps_generator.checks.check_sections import get_sections_size_check_set
|
||||
from maps_generator.checks.check_size import get_size_check_set
|
||||
|
||||
|
||||
class CheckType(Enum):
|
||||
low = 1
|
||||
medium = 2
|
||||
hard = 3
|
||||
strict = 4
|
||||
|
||||
|
||||
Threshold = namedtuple("Threshold", ["abs", "rel"])
|
||||
|
||||
_default_thresholds = {
|
||||
CheckType.low: Threshold(abs=20, rel=20),
|
||||
CheckType.medium: Threshold(abs=15, rel=15),
|
||||
CheckType.hard: Threshold(abs=10, rel=10),
|
||||
CheckType.strict: Threshold(abs=0, rel=0),
|
||||
}
|
||||
|
||||
|
||||
def set_thresholds(check_type_map: Mapping[CheckType, Threshold]):
|
||||
global _default_thresholds
|
||||
_default_thresholds = check_type_map
|
||||
|
||||
|
||||
def make_tmap(
|
||||
low: Optional[Tuple[float, float]] = None,
|
||||
medium: Optional[Tuple[float, float]] = None,
|
||||
hard: Optional[Tuple[float, float]] = None,
|
||||
strict: Optional[Tuple[float, float]] = None,
|
||||
):
|
||||
thresholds = _default_thresholds.copy()
|
||||
if low is not None:
|
||||
thresholds[CheckType.low] = Threshold(*low)
|
||||
if medium is not None:
|
||||
thresholds[CheckType.medium] = Threshold(*medium)
|
||||
if hard is not None:
|
||||
thresholds[CheckType.hard] = Threshold(*hard)
|
||||
if strict is not None:
|
||||
thresholds[CheckType.strict] = Threshold(*strict)
|
||||
return thresholds
|
||||
|
||||
|
||||
def make_default_filter(check_type_map: Mapping[CheckType, Threshold] = None):
|
||||
if check_type_map is None:
|
||||
check_type_map = _default_thresholds
|
||||
|
||||
def maker(check_type: CheckType):
|
||||
threshold = check_type_map[check_type]
|
||||
|
||||
def default_filter(r: check.ResLine):
|
||||
return (
|
||||
check.norm(r.diff) > threshold.abs and check.get_rel(r) > threshold.rel
|
||||
)
|
||||
|
||||
return default_filter
|
||||
|
||||
return maker
|
||||
|
||||
|
||||
class MwmsChecks(Enum):
|
||||
sections_existence = 1
|
||||
sections_size = 2
|
||||
mwm_size = 3
|
||||
types = 4
|
||||
booking = 5
|
||||
categories = 6
|
||||
|
||||
|
||||
def get_mwm_check_sets_and_filters(
|
||||
old_path: str, new_path: str, checks: Set[MwmsChecks] = None, **kwargs
|
||||
) -> Mapping[check.Check, Callable]:
|
||||
def need_add(t: MwmsChecks):
|
||||
return checks is None or t in checks
|
||||
|
||||
m = {get_sections_existence_check_set(old_path, new_path): None}
|
||||
|
||||
if need_add(MwmsChecks.sections_size):
|
||||
c = get_sections_size_check_set(old_path, new_path)
|
||||
thresholds = make_tmap(low=(0, 20), medium=(0, 10), hard=(0, 5))
|
||||
m[c] = make_default_filter(thresholds)
|
||||
|
||||
mb = 1 << 20
|
||||
|
||||
if need_add(MwmsChecks.mwm_size):
|
||||
c = get_size_check_set(old_path, new_path)
|
||||
thresholds = make_tmap(low=(2 * mb, 10), medium=(mb, 5), hard=(0.5 * mb, 2))
|
||||
m[c] = make_default_filter(thresholds)
|
||||
|
||||
if need_add(MwmsChecks.types):
|
||||
c = get_mwm_types_check_set(old_path, new_path)
|
||||
thresholds = make_tmap(low=(500, 30), medium=(100, 20), hard=(100, 10))
|
||||
m[c] = make_default_filter(thresholds)
|
||||
|
||||
if need_add(MwmsChecks.booking):
|
||||
c = get_mwm_type_check_set(old_path, new_path, "sponsored-booking")
|
||||
thresholds = make_tmap(low=(500, 20), medium=(50, 10), hard=(50, 5))
|
||||
m[c] = make_default_filter(thresholds)
|
||||
|
||||
if need_add(MwmsChecks.categories):
|
||||
c = get_categories_check_set(old_path, new_path, kwargs["categories_path"])
|
||||
thresholds = make_tmap(low=(200, 20), medium=(50, 10), hard=(50, 5))
|
||||
m[c] = make_default_filter(thresholds)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
class LogsChecks(Enum):
|
||||
log_levels = 1
|
||||
addresses = 2
|
||||
|
||||
|
||||
def get_logs_check_sets_and_filters(
|
||||
old_path: str, new_path: str, checks: Set[LogsChecks] = None
|
||||
) -> Mapping[check.Check, Callable]:
|
||||
def need_add(t: LogsChecks):
|
||||
return checks is None or t in checks
|
||||
|
||||
m = {get_log_levels_check_set(old_path, new_path): None}
|
||||
|
||||
if need_add(LogsChecks.addresses):
|
||||
c = get_addresses_check_set(old_path, new_path)
|
||||
thresholds = make_tmap(low=(50, 20), medium=(20, 10), hard=(10, 5))
|
||||
m[c] = make_default_filter(thresholds)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
def _print_header(file, header, width=100, s="="):
|
||||
stars = s * ((width - len(header)) // 2)
|
||||
rstars = stars
|
||||
if 2 * len(stars) + len(header) < width:
|
||||
rstars += s
|
||||
print(stars, header, rstars, file=file)
|
||||
|
||||
|
||||
def run_checks_and_print_results(
|
||||
checks: Mapping[check.Check, Callable],
|
||||
check_type: CheckType,
|
||||
silent_if_no_results: bool = True,
|
||||
file=sys.stdout,
|
||||
):
|
||||
for check, make_filt in checks.items():
|
||||
check.check()
|
||||
_print_header(file, check.name)
|
||||
check.print(
|
||||
silent_if_no_results=silent_if_no_results,
|
||||
filt=None if make_filt is None else make_filt(check_type),
|
||||
file=file,
|
||||
)
|
||||
0
tools/python/maps_generator/checks/logs/__init__.py
Normal file
0
tools/python/maps_generator/checks/logs/__init__.py
Normal file
241
tools/python/maps_generator/checks/logs/logs_reader.py
Normal file
241
tools/python/maps_generator/checks/logs/logs_reader.py
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from collections import Counter
|
||||
from collections import namedtuple
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import Tuple
|
||||
from typing import Union
|
||||
|
||||
import maps_generator.generator.env as env
|
||||
from maps_generator.generator.stages import get_stage_type
|
||||
from maps_generator.utils.algo import parse_timedelta
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
FLAGS = re.MULTILINE | re.DOTALL
|
||||
|
||||
GEN_LINE_PATTERN = re.compile(
|
||||
r"^LOG\s+TID\((?P<tid>\d+)\)\s+(?P<level>[A-Z]+)\s+"
|
||||
r"(?P<timestamp>[-.e0-9]+)\s+(?P<message>.+)$",
|
||||
FLAGS,
|
||||
)
|
||||
GEN_LINE_CHECK_PATTERN = re.compile(
|
||||
r"^TID\((?P<tid>\d+)\)\s+" r"ASSERT FAILED\s+(?P<message>.+)$", FLAGS
|
||||
)
|
||||
|
||||
MAPS_GEN_LINE_PATTERN = re.compile(
|
||||
r"^\[(?P<time_string>[0-9-:, ]+)\]\s+(?P<level>\w+)\s+"
|
||||
r"(?P<module>\w+)\s+(?P<message>.+)$",
|
||||
FLAGS,
|
||||
)
|
||||
|
||||
STAGE_START_MSG_PATTERN = re.compile(r"^Stage (?P<name>\w+): start ...$")
|
||||
STAGE_FINISH_MSG_PATTERN = re.compile(
|
||||
r"^Stage (?P<name>\w+): finished in (?P<duration_string>.+)$"
|
||||
)
|
||||
|
||||
LogLine = namedtuple("LogLine", ["timestamp", "level", "tid", "message", "type"])
|
||||
LogStage = namedtuple("LogStage", ["name", "duration", "lines"])
|
||||
|
||||
|
||||
class LogType(Enum):
|
||||
gen = 1
|
||||
maps_gen = 2
|
||||
|
||||
|
||||
class Log:
|
||||
def __init__(self, path: str):
|
||||
self.path = Path(path)
|
||||
self.name = self.path.stem
|
||||
|
||||
self.is_stage_log = False
|
||||
self.is_mwm_log = False
|
||||
try:
|
||||
get_stage_type(self.name)
|
||||
self.is_stage_log = True
|
||||
except AttributeError:
|
||||
if self.name in env.COUNTRIES_NAMES or self.name in env.WORLDS_NAMES:
|
||||
self.is_mwm_log = True
|
||||
|
||||
self.lines = self._parse_lines()
|
||||
|
||||
def _parse_lines(self) -> List[LogLine]:
|
||||
logline = ""
|
||||
state = None
|
||||
lines = []
|
||||
base_timestamp = 0.0
|
||||
|
||||
def try_parse_and_insert():
|
||||
nonlocal logline
|
||||
logline = logline.strip()
|
||||
if not logline:
|
||||
return
|
||||
|
||||
nonlocal base_timestamp
|
||||
line = None
|
||||
if state == LogType.gen:
|
||||
line = Log._parse_gen_line(logline, base_timestamp)
|
||||
elif state == LogType.maps_gen:
|
||||
line = Log._parse_maps_gen_line(logline)
|
||||
base_timestamp = line.timestamp
|
||||
|
||||
if line is not None:
|
||||
lines.append(line)
|
||||
else:
|
||||
logger.warn(f"{self.name}: line was not parsed: {logline}")
|
||||
logline = ""
|
||||
|
||||
with self.path.open() as logfile:
|
||||
for line in logfile:
|
||||
if line.startswith("LOG") or line.startswith("TID"):
|
||||
try_parse_and_insert()
|
||||
state = LogType.gen
|
||||
elif line.startswith("["):
|
||||
try_parse_and_insert()
|
||||
state = LogType.maps_gen
|
||||
logline += line
|
||||
try_parse_and_insert()
|
||||
|
||||
return lines
|
||||
|
||||
@staticmethod
|
||||
def _parse_gen_line(line: str, base_time: float = 0.0) -> LogLine:
|
||||
m = GEN_LINE_PATTERN.match(line)
|
||||
if m:
|
||||
return LogLine(
|
||||
timestamp=base_time + float(m["timestamp"]),
|
||||
level=logging.getLevelName(m["level"]),
|
||||
tid=int(m["tid"]),
|
||||
message=m["message"],
|
||||
type=LogType.gen,
|
||||
)
|
||||
|
||||
m = GEN_LINE_CHECK_PATTERN.match(line)
|
||||
if m:
|
||||
return LogLine(
|
||||
timestamp=None,
|
||||
level=logging.getLevelName("CRITICAL"),
|
||||
tid=None,
|
||||
message=m["message"],
|
||||
type=LogType.gen,
|
||||
)
|
||||
|
||||
assert False, line
|
||||
|
||||
@staticmethod
|
||||
def _parse_maps_gen_line(line: str) -> LogLine:
|
||||
m = MAPS_GEN_LINE_PATTERN.match(line)
|
||||
time_string = m["time_string"].split(",")[0]
|
||||
timestamp = datetime.datetime.strptime(
|
||||
time_string, logging.Formatter.default_time_format
|
||||
).timestamp()
|
||||
if m:
|
||||
return LogLine(
|
||||
timestamp=float(timestamp),
|
||||
level=logging.getLevelName(m["level"]),
|
||||
tid=None,
|
||||
message=m["message"],
|
||||
type=LogType.maps_gen,
|
||||
)
|
||||
|
||||
assert False, line
|
||||
|
||||
|
||||
class LogsReader:
|
||||
def __init__(self, path: str):
|
||||
self.path = os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
def __iter__(self):
|
||||
for filename in os.listdir(self.path):
|
||||
if filename.endswith(".log"):
|
||||
yield Log(os.path.join(self.path, filename))
|
||||
|
||||
|
||||
def split_into_stages(log: Log) -> List[LogStage]:
|
||||
log_stages = []
|
||||
name = None
|
||||
lines = []
|
||||
for line in log.lines:
|
||||
if line.message.startswith("Stage"):
|
||||
m = STAGE_START_MSG_PATTERN.match(line.message)
|
||||
if m:
|
||||
if name is not None:
|
||||
logger.warn(f"{log.name}: stage {name} has not finish line.")
|
||||
log_stages.append(LogStage(name=name, duration=None, lines=lines))
|
||||
name = m["name"]
|
||||
|
||||
m = STAGE_FINISH_MSG_PATTERN.match(line.message)
|
||||
if m:
|
||||
# assert name == m["name"], line
|
||||
duration = parse_timedelta(m["duration_string"])
|
||||
log_stages.append(LogStage(name=name, duration=duration, lines=lines))
|
||||
name = None
|
||||
lines = []
|
||||
else:
|
||||
lines.append(line)
|
||||
|
||||
if name is not None:
|
||||
logger.warn(f"{log.name}: stage {name} has not finish line.")
|
||||
log_stages.append(LogStage(name=name, duration=None, lines=lines))
|
||||
|
||||
return log_stages
|
||||
|
||||
|
||||
def _is_worse(lhs: LogStage, rhs: LogStage) -> bool:
|
||||
if (lhs.duration is None) ^ (rhs.duration is None):
|
||||
return lhs.duration is None
|
||||
|
||||
if len(rhs.lines) > len(lhs.lines):
|
||||
return True
|
||||
|
||||
return rhs.duration > lhs.duration
|
||||
|
||||
|
||||
def normalize_logs(llogs: List[LogStage]) -> List[LogStage]:
|
||||
normalized_logs = []
|
||||
buckets = {}
|
||||
for log in llogs:
|
||||
if log.name in buckets:
|
||||
if _is_worse(normalized_logs[buckets[log.name]], log):
|
||||
normalized_logs[buckets[log.name]] = log
|
||||
else:
|
||||
normalized_logs.append(log)
|
||||
buckets[log.name] = len(normalized_logs) - 1
|
||||
|
||||
return normalized_logs
|
||||
|
||||
|
||||
def count_levels(logs: Union[List[LogLine], LogStage]) -> Counter:
|
||||
if isinstance(logs, list):
|
||||
return Counter(log.level for log in logs)
|
||||
|
||||
if isinstance(logs, LogStage):
|
||||
return count_levels(logs.lines)
|
||||
|
||||
assert False, f"Type {type(logs)} is unsupported."
|
||||
|
||||
|
||||
def find_and_parse(
|
||||
logs: Union[List[LogLine], LogStage], pattern: Union[str, type(re.compile(""))],
|
||||
) -> List[Tuple[dict, str]]:
|
||||
if isinstance(pattern, str):
|
||||
pattern = re.compile(pattern, FLAGS)
|
||||
|
||||
if isinstance(logs, list):
|
||||
found = []
|
||||
for log in logs:
|
||||
m = pattern.match(log.message)
|
||||
if m:
|
||||
found.append((m.groupdict(), log))
|
||||
return found
|
||||
|
||||
if isinstance(logs, LogStage):
|
||||
return find_and_parse(logs.lines, pattern)
|
||||
|
||||
assert False, f"Type {type(logs)} is unsupported."
|
||||
37
tools/python/maps_generator/diff_stats.py
Normal file
37
tools/python/maps_generator/diff_stats.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import argparse
|
||||
|
||||
from maps_generator.generator.statistics import diff
|
||||
from maps_generator.generator.statistics import read_types
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="This script prints the difference between old_stats.json and new_stats.json."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--old",
|
||||
default="",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to old file with map generation statistics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--new",
|
||||
default="",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to new file with map generation statistics.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
old = read_types(args.old)
|
||||
new = read_types(args.new)
|
||||
for line in diff(new, old):
|
||||
print(";".join(str(x) for x in line))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
tools/python/maps_generator/generator/__init__.py
Normal file
0
tools/python/maps_generator/generator/__init__.py
Normal file
68
tools/python/maps_generator/generator/coastline.py
Normal file
68
tools/python/maps_generator/generator/coastline.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
"""
|
||||
This file contains api for osmfilter and generator_tool to generate coastline.
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from maps_generator.generator import settings
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.gen_tool import run_gen_tool
|
||||
from maps_generator.generator.osmtools import osmfilter
|
||||
|
||||
|
||||
def filter_coastline(
|
||||
name_executable,
|
||||
in_file,
|
||||
out_file,
|
||||
output=subprocess.DEVNULL,
|
||||
error=subprocess.DEVNULL,
|
||||
):
|
||||
osmfilter(
|
||||
name_executable,
|
||||
in_file,
|
||||
out_file,
|
||||
output=output,
|
||||
error=error,
|
||||
keep="",
|
||||
keep_ways="natural=coastline",
|
||||
keep_nodes="capital=yes place=town =city",
|
||||
)
|
||||
|
||||
|
||||
def make_coastline(env: Env):
|
||||
coastline_o5m = os.path.join(env.paths.coastline_path, "coastline.o5m")
|
||||
filter_coastline(
|
||||
env[settings.OSM_TOOL_FILTER],
|
||||
env.paths.planet_o5m,
|
||||
coastline_o5m,
|
||||
output=env.get_subprocess_out(),
|
||||
error=env.get_subprocess_out(),
|
||||
)
|
||||
|
||||
run_gen_tool(
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(),
|
||||
err=env.get_subprocess_out(),
|
||||
data_path=env.paths.data_path,
|
||||
intermediate_data_path=env.paths.coastline_path,
|
||||
osm_file_type="o5m",
|
||||
osm_file_name=coastline_o5m,
|
||||
node_storage=env.node_storage,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
preprocess=True,
|
||||
)
|
||||
|
||||
run_gen_tool(
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(),
|
||||
err=env.get_subprocess_out(),
|
||||
data_path=env.paths.data_path,
|
||||
intermediate_data_path=env.paths.coastline_path,
|
||||
osm_file_type="o5m",
|
||||
osm_file_name=coastline_o5m,
|
||||
node_storage=env.node_storage,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
make_coasts=True,
|
||||
fail_on_coasts=True,
|
||||
threads_count=settings.THREADS_COUNT,
|
||||
)
|
||||
100
tools/python/maps_generator/generator/diffs.py
Normal file
100
tools/python/maps_generator/generator/diffs.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
from pathlib import Path
|
||||
|
||||
import subprocess
|
||||
import warnings
|
||||
|
||||
class Status:
|
||||
NO_NEW_VERSION = "Failed: new version doesn't exist: {new}"
|
||||
INTERNAL_ERROR = "Failed: internal error (C++ module) while calculating"
|
||||
|
||||
NO_OLD_VERSION = "Skipped: old version doesn't exist: {old}"
|
||||
NOTHING_TO_DO = "Skipped: output already exists: {out}"
|
||||
OK = "Succeeded: calculated {out}: {diff_size} out of {new_size} bytes"
|
||||
TOO_LARGE = "Cancelled: {out}: diff {diff_size} > new version {new_size}"
|
||||
|
||||
@classmethod
|
||||
def is_error(cls, status):
|
||||
return status == cls.NO_NEW_VERSION or status == cls.INTERNAL_ERROR
|
||||
|
||||
|
||||
def calculate_diff(params):
|
||||
diff_tool, new, old, out = params["tool"], params["new"], params["old"], params["out"]
|
||||
|
||||
if not new.exists():
|
||||
return Status.NO_NEW_VERSION, params
|
||||
|
||||
if not old.exists():
|
||||
return Status.NO_OLD_VERSION, params
|
||||
|
||||
status = Status.OK
|
||||
if out.exists():
|
||||
status = Status.NOTHING_TO_DO
|
||||
else:
|
||||
res = subprocess.run([diff_tool.as_posix(), "make", old, new, out])
|
||||
if res.returncode != 0:
|
||||
return Status.INTERNAL_ERROR, params
|
||||
|
||||
diff_size = out.stat().st_size
|
||||
new_size = new.stat().st_size
|
||||
|
||||
if diff_size > new_size:
|
||||
status = Status.TOO_LARGE
|
||||
|
||||
params.update({
|
||||
"diff_size": diff_size,
|
||||
"new_size": new_size
|
||||
})
|
||||
|
||||
return status, params
|
||||
|
||||
|
||||
def mwm_diff_calculation(data_dir, logger, depth):
|
||||
data = list(data_dir.get_mwms())[:depth]
|
||||
results = map(calculate_diff, data)
|
||||
for status, params in results:
|
||||
if Status.is_error(status):
|
||||
raise Exception(status.format(**params))
|
||||
logger.info(status.format(**params))
|
||||
|
||||
|
||||
class DataDir(object):
|
||||
def __init__(self, diff_tool, mwm_name, new_version_dir, old_version_root_dir):
|
||||
self.diff_tool_path = Path(diff_tool)
|
||||
self.mwm_name = mwm_name
|
||||
self.diff_name = self.mwm_name + ".mwmdiff"
|
||||
|
||||
self.new_version_dir = Path(new_version_dir)
|
||||
self.new_version_path = Path(new_version_dir, mwm_name)
|
||||
self.old_version_root_dir = Path(old_version_root_dir)
|
||||
|
||||
def get_mwms(self):
|
||||
old_versions = sorted(
|
||||
self.old_version_root_dir.glob("[0-9]*"),
|
||||
reverse=True
|
||||
)
|
||||
for old_version_dir in old_versions:
|
||||
if (old_version_dir != self.new_version_dir and
|
||||
old_version_dir.is_dir()):
|
||||
diff_dir = Path(self.new_version_dir, old_version_dir.name)
|
||||
diff_dir.mkdir(exist_ok=True)
|
||||
yield {
|
||||
"tool": self.diff_tool_path,
|
||||
"new": self.new_version_path,
|
||||
"old": Path(old_version_dir, self.mwm_name),
|
||||
"out": Path(diff_dir, self.diff_name)
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import logging
|
||||
import sys
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
data_dir = DataDir(
|
||||
mwm_name=sys.argv[1], new_version_dir=sys.argv[2],
|
||||
old_version_root_dir=sys.argv[3],
|
||||
)
|
||||
mwm_diff_calculation(data_dir, logger, depth=1)
|
||||
582
tools/python/maps_generator/generator/env.py
Normal file
582
tools/python/maps_generator/generator/env.py
Normal file
|
|
@ -0,0 +1,582 @@
|
|||
import collections
|
||||
import datetime
|
||||
import logging
|
||||
import logging.config
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from functools import wraps
|
||||
from typing import Any
|
||||
from typing import AnyStr
|
||||
from typing import Callable
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Set
|
||||
from typing import Type
|
||||
from typing import Union
|
||||
|
||||
from maps_generator.generator import settings
|
||||
from maps_generator.generator import status
|
||||
from maps_generator.generator.osmtools import build_osmtools
|
||||
from maps_generator.generator.stages import Stage
|
||||
from maps_generator.utils.file import find_executable
|
||||
from maps_generator.utils.file import is_executable
|
||||
from maps_generator.utils.file import make_symlink
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
WORLD_NAME = "World"
|
||||
WORLD_COASTS_NAME = "WorldCoasts"
|
||||
|
||||
WORLDS_NAMES = {WORLD_NAME, WORLD_COASTS_NAME}
|
||||
|
||||
|
||||
def get_all_countries_list(borders_path: AnyStr) -> List[AnyStr]:
|
||||
"""Returns all countries including World and WorldCoasts."""
|
||||
return [
|
||||
f.replace(".poly", "")
|
||||
for f in os.listdir(borders_path)
|
||||
if os.path.isfile(os.path.join(borders_path, f))
|
||||
] + list(WORLDS_NAMES)
|
||||
|
||||
|
||||
def create_if_not_exist_path(path: AnyStr) -> bool:
|
||||
"""Creates directory if it doesn't exist."""
|
||||
try:
|
||||
os.makedirs(path)
|
||||
logger.info(f"Create {path} ...")
|
||||
return True
|
||||
except FileExistsError:
|
||||
return False
|
||||
|
||||
|
||||
def create_if_not_exist(func: Callable[..., AnyStr]) -> Callable[..., AnyStr]:
|
||||
"""
|
||||
It's a decorator, that wraps func in create_if_not_exist_path,
|
||||
that returns a path.
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
path = func(*args, **kwargs)
|
||||
create_if_not_exist_path(path)
|
||||
return path
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class Version:
|
||||
"""It's used for writing and reading a generation version."""
|
||||
|
||||
@staticmethod
|
||||
def write(out_path: AnyStr, version: AnyStr):
|
||||
with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f:
|
||||
f.write(str(version))
|
||||
|
||||
@staticmethod
|
||||
def read(version_path: AnyStr) -> int:
|
||||
with open(version_path) as f:
|
||||
line = f.readline().strip()
|
||||
try:
|
||||
return int(line)
|
||||
except ValueError:
|
||||
logger.exception(f"Cast '{line}' to int error.")
|
||||
return 0
|
||||
|
||||
|
||||
def find_last_build_dir(hint: Optional[AnyStr] = None) -> Optional[AnyStr]:
|
||||
"""
|
||||
It tries to find a last generation directory. If it's found function
|
||||
returns path of last generation directory. Otherwise returns None.
|
||||
"""
|
||||
if hint is not None:
|
||||
p = os.path.join(settings.MAIN_OUT_PATH, hint)
|
||||
return hint if os.path.exists(p) else None
|
||||
try:
|
||||
paths = [
|
||||
os.path.join(settings.MAIN_OUT_PATH, f)
|
||||
for f in os.listdir(settings.MAIN_OUT_PATH)
|
||||
]
|
||||
except FileNotFoundError:
|
||||
logger.exception(f"{settings.MAIN_OUT_PATH} not found.")
|
||||
return None
|
||||
versions = []
|
||||
for path in paths:
|
||||
version_path = os.path.join(path, settings.VERSION_FILE_NAME)
|
||||
if not os.path.isfile(version_path):
|
||||
versions.append(0)
|
||||
else:
|
||||
versions.append(Version.read(version_path))
|
||||
pairs = sorted(zip(paths, versions), key=lambda p: p[1], reverse=True)
|
||||
return None if not pairs or pairs[0][1] == 0 else pairs[0][0].split(os.sep)[-1]
|
||||
|
||||
|
||||
class PathProvider:
|
||||
"""
|
||||
PathProvider is used for building paths for a maps generation.
|
||||
"""
|
||||
|
||||
def __init__(self, build_path: AnyStr, build_name:AnyStr, mwm_version: AnyStr):
|
||||
self.build_path = build_path
|
||||
self.build_name = build_name
|
||||
self.mwm_version = mwm_version
|
||||
|
||||
create_if_not_exist_path(self.build_path)
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def intermediate_data_path(self) -> AnyStr:
|
||||
"""
|
||||
intermediate_data_path contains intermediate files,
|
||||
for example downloaded external files, that are needed for generation,
|
||||
*.mwm.tmp files, etc.
|
||||
"""
|
||||
return os.path.join(self.build_path, "intermediate_data")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def cache_path(self) -> AnyStr:
|
||||
"""cache_path contains caches for nodes, ways, relations."""
|
||||
if not settings.CACHE_PATH:
|
||||
return self.intermediate_data_path
|
||||
|
||||
return os.path.join(settings.CACHE_PATH, self.build_name)
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def data_path(self) -> AnyStr:
|
||||
"""It's a synonym for intermediate_data_path."""
|
||||
return self.intermediate_data_path
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def intermediate_tmp_path(self) -> AnyStr:
|
||||
"""intermediate_tmp_path contains *.mwm.tmp files."""
|
||||
return os.path.join(self.intermediate_data_path, "tmp")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def mwm_path(self) -> AnyStr:
|
||||
"""mwm_path contains *.mwm files."""
|
||||
return os.path.join(self.build_path, self.mwm_version)
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def log_path(self) -> AnyStr:
|
||||
"""mwm_path log files."""
|
||||
return os.path.join(self.build_path, "logs")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def generation_borders_path(self) -> AnyStr:
|
||||
"""
|
||||
generation_borders_path contains *.poly files, that define
|
||||
which .mwm files are generated.
|
||||
"""
|
||||
return os.path.join(self.intermediate_data_path, "borders")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def draft_path(self) -> AnyStr:
|
||||
"""draft_path is used for saving temporary intermediate files."""
|
||||
return os.path.join(self.build_path, "draft")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def osm2ft_path(self) -> AnyStr:
|
||||
"""osm2ft_path contains osmId<->ftId mappings."""
|
||||
return os.path.join(self.build_path, "osm2ft")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def coastline_path(self) -> AnyStr:
|
||||
"""coastline_path is used for a coastline generation."""
|
||||
return os.path.join(self.intermediate_data_path, "coasts")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def coastline_tmp_path(self) -> AnyStr:
|
||||
"""coastline_tmp_path is used for a coastline generation."""
|
||||
return os.path.join(self.coastline_path, "tmp")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def status_path(self) -> AnyStr:
|
||||
"""status_path contains status files."""
|
||||
return os.path.join(self.build_path, "status")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def descriptions_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "descriptions")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def stats_path(self) -> AnyStr:
|
||||
return os.path.join(self.build_path, "stats")
|
||||
|
||||
@property
|
||||
@create_if_not_exist
|
||||
def transit_path(self) -> AnyStr:
|
||||
return self.intermediate_data_path
|
||||
|
||||
@property
|
||||
def transit_path_experimental(self) -> AnyStr:
|
||||
return (
|
||||
os.path.join(self.intermediate_data_path, "transit_from_gtfs")
|
||||
if settings.TRANSIT_URL
|
||||
else ""
|
||||
)
|
||||
|
||||
@property
|
||||
def world_roads_path(self) -> AnyStr:
|
||||
return (
|
||||
os.path.join(self.intermediate_data_path, "world_roads.txt")
|
||||
if settings.NEED_BUILD_WORLD_ROADS
|
||||
else ""
|
||||
)
|
||||
|
||||
@property
|
||||
def planet_osm_pbf(self) -> AnyStr:
|
||||
return os.path.join(self.build_path, f"{settings.PLANET}.osm.pbf")
|
||||
|
||||
@property
|
||||
def planet_o5m(self) -> AnyStr:
|
||||
return os.path.join(self.build_path, f"{settings.PLANET}.o5m")
|
||||
|
||||
@property
|
||||
def world_roads_o5m(self) -> AnyStr:
|
||||
return os.path.join(self.build_path, "world_roads.o5m")
|
||||
|
||||
@property
|
||||
def main_status_path(self) -> AnyStr:
|
||||
return os.path.join(self.status_path, status.with_stat_ext("stages"))
|
||||
|
||||
@property
|
||||
def packed_polygons_path(self) -> AnyStr:
|
||||
return os.path.join(self.mwm_path, "packed_polygons.bin")
|
||||
|
||||
@property
|
||||
def localads_path(self) -> AnyStr:
|
||||
return os.path.join(self.build_path, f"localads_{self.mwm_version}")
|
||||
|
||||
@property
|
||||
def types_path(self) -> AnyStr:
|
||||
return os.path.join(self.user_resource_path, "types.txt")
|
||||
|
||||
@property
|
||||
def external_resources_path(self) -> AnyStr:
|
||||
return os.path.join(self.mwm_path, "external_resources.txt")
|
||||
|
||||
@property
|
||||
def id_to_wikidata_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "id_to_wikidata.csv")
|
||||
|
||||
@property
|
||||
def wiki_url_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "wiki_urls.txt")
|
||||
|
||||
@property
|
||||
def ugc_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "ugc_db.sqlite3")
|
||||
|
||||
@property
|
||||
def hotels_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "hotels.csv")
|
||||
|
||||
@property
|
||||
def promo_catalog_cities_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "promo_catalog_cities.json")
|
||||
|
||||
@property
|
||||
def promo_catalog_countries_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "promo_catalog_countries.json")
|
||||
|
||||
@property
|
||||
def popularity_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "popular_places.csv")
|
||||
|
||||
@property
|
||||
def subway_path(self) -> AnyStr:
|
||||
return os.path.join(
|
||||
self.intermediate_data_path, "mapsme_osm_subways.transit.json"
|
||||
)
|
||||
|
||||
@property
|
||||
def food_paths(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "ids_food.json")
|
||||
|
||||
@property
|
||||
def food_translations_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "translations_food.json")
|
||||
|
||||
@property
|
||||
def cities_boundaries_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "cities_boundaries.bin")
|
||||
|
||||
@property
|
||||
def hierarchy_path(self) -> AnyStr:
|
||||
return os.path.join(self.user_resource_path, "hierarchy.txt")
|
||||
|
||||
@property
|
||||
def old_to_new_path(self) -> AnyStr:
|
||||
return os.path.join(self.user_resource_path, "old_vs_new.csv")
|
||||
|
||||
@property
|
||||
def borders_to_osm_path(self) -> AnyStr:
|
||||
return os.path.join(self.user_resource_path, "borders_vs_osm.csv")
|
||||
|
||||
@property
|
||||
def countries_synonyms_path(self) -> AnyStr:
|
||||
return os.path.join(self.user_resource_path, "countries_synonyms.csv")
|
||||
|
||||
@property
|
||||
def counties_txt_path(self) -> AnyStr:
|
||||
return os.path.join(self.mwm_path, "countries.txt")
|
||||
|
||||
@property
|
||||
def user_resource_path(self) -> AnyStr:
|
||||
return settings.USER_RESOURCE_PATH
|
||||
|
||||
@staticmethod
|
||||
def srtm_path() -> AnyStr:
|
||||
return settings.SRTM_PATH
|
||||
|
||||
@staticmethod
|
||||
def isolines_path() -> AnyStr:
|
||||
return settings.ISOLINES_PATH
|
||||
|
||||
@staticmethod
|
||||
def addresses_path() -> AnyStr:
|
||||
return settings.ADDRESSES_PATH
|
||||
|
||||
@staticmethod
|
||||
def borders_path() -> AnyStr:
|
||||
return os.path.join(settings.USER_RESOURCE_PATH, "borders")
|
||||
|
||||
@staticmethod
|
||||
@create_if_not_exist
|
||||
def tmp_dir():
|
||||
return settings.TMPDIR
|
||||
|
||||
|
||||
COUNTRIES_NAMES = set(get_all_countries_list(PathProvider.borders_path()))
|
||||
|
||||
|
||||
class Env:
|
||||
"""
|
||||
Env provides a generation environment. It sets up instruments and paths,
|
||||
that are used for a maps generation. It stores state of the maps generation.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
countries: Optional[List[AnyStr]] = None,
|
||||
production: bool = False,
|
||||
build_name: Optional[AnyStr] = None,
|
||||
build_suffix: AnyStr = "",
|
||||
skipped_stages: Optional[Set[Type[Stage]]] = None,
|
||||
force_download_files: bool = False,
|
||||
):
|
||||
self.setup_logging()
|
||||
|
||||
logger.info("Start setup ...")
|
||||
os.environ["TMPDIR"] = PathProvider.tmp_dir()
|
||||
for k, v in self.setup_osm_tools().items():
|
||||
setattr(self, k, v)
|
||||
|
||||
self.production = production
|
||||
self.force_download_files = force_download_files
|
||||
self.countries = countries
|
||||
self.skipped_stages = set() if skipped_stages is None else skipped_stages
|
||||
if self.countries is None:
|
||||
self.countries = get_all_countries_list(PathProvider.borders_path())
|
||||
|
||||
self.node_storage = settings.NODE_STORAGE
|
||||
|
||||
version_format = "%Y_%m_%d__%H_%M_%S"
|
||||
suffix_div = "-"
|
||||
self.dt = None
|
||||
if build_name is None:
|
||||
self.dt = datetime.datetime.now()
|
||||
build_name = self.dt.strftime(version_format)
|
||||
if build_suffix:
|
||||
build_name = f"{build_name}{suffix_div}{build_suffix}"
|
||||
else:
|
||||
s = build_name.split(suffix_div, maxsplit=1)
|
||||
if len(s) == 1:
|
||||
s.append("")
|
||||
|
||||
date_str, build_suffix = s
|
||||
self.dt = datetime.datetime.strptime(date_str, version_format)
|
||||
|
||||
self.build_suffix = build_suffix
|
||||
self.mwm_version = self.dt.strftime("%y%m%d")
|
||||
self.planet_version = self.dt.strftime("%s")
|
||||
self.build_path = os.path.join(settings.MAIN_OUT_PATH, build_name)
|
||||
self.build_name = build_name
|
||||
|
||||
self.gen_tool = self.setup_generator_tool()
|
||||
if WORLD_NAME in self.countries:
|
||||
self.world_roads_builder_tool = self.setup_world_roads_builder_tool()
|
||||
self.diff_tool = self.setup_mwm_diff_tool()
|
||||
|
||||
logger.info(f"Build name is {self.build_name}.")
|
||||
logger.info(f"Build path is {self.build_path}.")
|
||||
|
||||
self.paths = PathProvider(self.build_path, self.build_name, self.mwm_version)
|
||||
|
||||
Version.write(self.build_path, self.planet_version)
|
||||
self.setup_borders()
|
||||
self.setup_osm2ft()
|
||||
|
||||
if self.force_download_files:
|
||||
for item in os.listdir(self.paths.status_path):
|
||||
if item.endswith(".download"):
|
||||
os.remove(os.path.join(self.paths.status_path, item))
|
||||
|
||||
self.main_status = status.Status()
|
||||
# self.countries_meta stores log files and statuses for each country.
|
||||
self.countries_meta = collections.defaultdict(dict)
|
||||
self.subprocess_out = None
|
||||
self.subprocess_countries_out = {}
|
||||
|
||||
printed_countries = ", ".join(self.countries)
|
||||
if len(self.countries) > 50:
|
||||
printed_countries = (
|
||||
f"{', '.join(self.countries[:25])}, ..., "
|
||||
f"{', '.join(self.countries[-25:])}"
|
||||
)
|
||||
logger.info(
|
||||
f"The following {len(self.countries)} maps will build: "
|
||||
f"{printed_countries}."
|
||||
)
|
||||
logger.info("Finish setup")
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.__dict__[item]
|
||||
|
||||
def get_tmp_mwm_names(self) -> List[AnyStr]:
|
||||
tmp_ext = ".mwm.tmp"
|
||||
existing_names = set()
|
||||
for f in os.listdir(self.paths.intermediate_tmp_path):
|
||||
path = os.path.join(self.paths.intermediate_tmp_path, f)
|
||||
if f.endswith(tmp_ext) and os.path.isfile(path):
|
||||
name = f.replace(tmp_ext, "")
|
||||
if name in self.countries:
|
||||
existing_names.add(name)
|
||||
return [c for c in self.countries if c in existing_names]
|
||||
|
||||
def add_skipped_stage(self, stage: Union[Type[Stage], Stage]):
|
||||
if isinstance(stage, Stage):
|
||||
stage = stage.__class__
|
||||
self.skipped_stages.add(stage)
|
||||
|
||||
def is_accepted_stage(self, stage: Union[Type[Stage], Stage]) -> bool:
|
||||
if isinstance(stage, Stage):
|
||||
stage = stage.__class__
|
||||
return stage not in self.skipped_stages
|
||||
|
||||
def finish(self):
|
||||
self.main_status.finish()
|
||||
|
||||
def finish_mwm(self, mwm_name: AnyStr):
|
||||
self.countries_meta[mwm_name]["status"].finish()
|
||||
|
||||
def set_subprocess_out(self, subprocess_out: Any, country: Optional[AnyStr] = None):
|
||||
if country is None:
|
||||
self.subprocess_out = subprocess_out
|
||||
else:
|
||||
self.subprocess_countries_out[country] = subprocess_out
|
||||
|
||||
def get_subprocess_out(self, country: Optional[AnyStr] = None):
|
||||
if country is None:
|
||||
return self.subprocess_out
|
||||
else:
|
||||
return self.subprocess_countries_out[country]
|
||||
|
||||
@staticmethod
|
||||
def setup_logging():
|
||||
def exception_handler(type, value, tb):
|
||||
logger.exception(
|
||||
f"Uncaught exception: {str(value)}", exc_info=(type, value, tb)
|
||||
)
|
||||
|
||||
logging.config.dictConfig(settings.LOGGING)
|
||||
sys.excepthook = exception_handler
|
||||
|
||||
@staticmethod
|
||||
def setup_generator_tool() -> AnyStr:
|
||||
logger.info("Check generator tool ...")
|
||||
exceptions = []
|
||||
for gen_tool in settings.POSSIBLE_GEN_TOOL_NAMES:
|
||||
gen_tool_path = shutil.which(gen_tool)
|
||||
if gen_tool_path is None:
|
||||
logger.info(f"Looking for generator tool in {settings.BUILD_PATH} ...")
|
||||
try:
|
||||
gen_tool_path = find_executable(settings.BUILD_PATH, gen_tool)
|
||||
except FileNotFoundError as e:
|
||||
exceptions.append(e)
|
||||
continue
|
||||
|
||||
logger.info(f"Generator tool found - {gen_tool_path}")
|
||||
return gen_tool_path
|
||||
|
||||
raise Exception(exceptions)
|
||||
|
||||
@staticmethod
|
||||
def setup_world_roads_builder_tool() -> AnyStr:
|
||||
logger.info(f"Check world_roads_builder_tool. Looking for it in {settings.BUILD_PATH} ...")
|
||||
world_roads_builder_tool_path = find_executable(settings.BUILD_PATH, "world_roads_builder_tool")
|
||||
logger.info(f"world_roads_builder_tool found - {world_roads_builder_tool_path}")
|
||||
return world_roads_builder_tool_path
|
||||
|
||||
@staticmethod
|
||||
def setup_mwm_diff_tool() -> AnyStr:
|
||||
logger.info(f"Check mwm_diff_tool. Looking for it in {settings.BUILD_PATH} ...")
|
||||
mwm_diff_tool_path = find_executable(settings.BUILD_PATH, "mwm_diff_tool")
|
||||
logger.info(f"mwm_diff_tool found - {mwm_diff_tool_path}")
|
||||
return mwm_diff_tool_path
|
||||
|
||||
@staticmethod
|
||||
def setup_osm_tools() -> Dict[AnyStr, AnyStr]:
|
||||
path = settings.OSM_TOOLS_PATH
|
||||
osm_tool_names = [
|
||||
settings.OSM_TOOL_CONVERT,
|
||||
settings.OSM_TOOL_UPDATE,
|
||||
settings.OSM_TOOL_FILTER,
|
||||
]
|
||||
|
||||
logger.info("Check for the osmctools binaries...")
|
||||
|
||||
# Check in the configured path first.
|
||||
tmp_paths = [os.path.join(path, t) for t in osm_tool_names]
|
||||
if not all([is_executable(t) for t in tmp_paths]):
|
||||
# Or use a system-wide installation.
|
||||
tmp_paths = [shutil.which(t) for t in osm_tool_names]
|
||||
if all([is_executable(t) for t in tmp_paths]):
|
||||
osm_tool_paths = dict(zip(osm_tool_names, tmp_paths))
|
||||
logger.info(f"Found osmctools at {', '.join(osm_tool_paths.values())}")
|
||||
return osm_tool_paths
|
||||
|
||||
logger.info(f"osmctools are not found, building from the sources into {path}...")
|
||||
os.makedirs(path, exist_ok=True)
|
||||
return build_osmtools(settings.OSM_TOOLS_SRC_PATH)
|
||||
|
||||
def setup_borders(self):
|
||||
temp_borders = self.paths.generation_borders_path
|
||||
borders = PathProvider.borders_path()
|
||||
for x in self.countries:
|
||||
if x in WORLDS_NAMES:
|
||||
continue
|
||||
|
||||
poly = f"{x}.poly"
|
||||
make_symlink(os.path.join(borders, poly), os.path.join(temp_borders, poly))
|
||||
make_symlink(temp_borders, os.path.join(self.paths.draft_path, "borders"))
|
||||
|
||||
def setup_osm2ft(self):
|
||||
for x in os.listdir(self.paths.osm2ft_path):
|
||||
p = os.path.join(self.paths.osm2ft_path, x)
|
||||
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
|
||||
shutil.move(p, os.path.join(self.paths.mwm_path, x))
|
||||
58
tools/python/maps_generator/generator/exceptions.py
Normal file
58
tools/python/maps_generator/generator/exceptions.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import os
|
||||
import subprocess
|
||||
|
||||
class MapsGeneratorError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class OptionNotFound(MapsGeneratorError):
|
||||
pass
|
||||
|
||||
|
||||
class ValidationError(MapsGeneratorError):
|
||||
pass
|
||||
|
||||
|
||||
class ContinueError(MapsGeneratorError):
|
||||
pass
|
||||
|
||||
|
||||
class SkipError(MapsGeneratorError):
|
||||
pass
|
||||
|
||||
|
||||
class BadExitStatusError(MapsGeneratorError):
|
||||
pass
|
||||
|
||||
|
||||
class ParseError(MapsGeneratorError):
|
||||
pass
|
||||
|
||||
|
||||
class FailedTest(MapsGeneratorError):
|
||||
pass
|
||||
|
||||
|
||||
def wait_and_raise_if_fail(p):
|
||||
if p.wait() != os.EX_OK:
|
||||
if type(p) is subprocess.Popen:
|
||||
args = p.args
|
||||
stdout = p.stdout
|
||||
stderr = p.stderr
|
||||
logs = None
|
||||
errors = None
|
||||
if type(stdout) is not type(None):
|
||||
logs = stdout.read(256).decode()
|
||||
if type(stderr) is not type(None):
|
||||
errors = stderr.read(256).decode()
|
||||
if errors != logs:
|
||||
logs += " and " + errors
|
||||
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
|
||||
raise BadExitStatusError(msg)
|
||||
else:
|
||||
args = p.args
|
||||
logs = p.output.name
|
||||
if p.error.name != logs:
|
||||
logs += " and " + p.error.name
|
||||
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
|
||||
raise BadExitStatusError(msg)
|
||||
162
tools/python/maps_generator/generator/gen_tool.py
Normal file
162
tools/python/maps_generator/generator/gen_tool.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
import copy
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from maps_generator.generator.exceptions import OptionNotFound
|
||||
from maps_generator.generator.exceptions import ValidationError
|
||||
from maps_generator.generator.exceptions import wait_and_raise_if_fail
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
|
||||
class GenTool:
|
||||
OPTIONS = {
|
||||
"dump_cities_boundaries": bool,
|
||||
"emit_coasts": bool,
|
||||
"fail_on_coasts": bool,
|
||||
"generate_cameras": bool,
|
||||
"generate_cities_boundaries": bool,
|
||||
"generate_cities_ids": bool,
|
||||
"generate_features": bool,
|
||||
"generate_geo_objects_features": bool,
|
||||
"generate_geo_objects_index": bool,
|
||||
"generate_geometry": bool,
|
||||
"generate_index": bool,
|
||||
"generate_isolines_info": bool,
|
||||
"generate_maxspeed": bool,
|
||||
"generate_packed_borders": bool,
|
||||
"generate_popular_places": bool,
|
||||
"generate_region_features": bool,
|
||||
"generate_regions": bool,
|
||||
"generate_regions_kv": bool,
|
||||
"generate_search_index": bool,
|
||||
"generate_traffic_keys": bool,
|
||||
"generate_world": bool,
|
||||
"have_borders_for_whole_world": bool,
|
||||
"make_city_roads": bool,
|
||||
"make_coasts": bool,
|
||||
"make_cross_mwm": bool,
|
||||
"make_routing_index": bool,
|
||||
"make_transit_cross_mwm": bool,
|
||||
"make_transit_cross_mwm_experimental": bool,
|
||||
"preprocess": bool,
|
||||
"split_by_polygons": bool,
|
||||
"stats_types": bool,
|
||||
"version": bool,
|
||||
"threads_count": int,
|
||||
"booking_data": str,
|
||||
"promo_catalog_cities": str,
|
||||
"brands_data": str,
|
||||
"brands_translations_data": str,
|
||||
"cache_path": str,
|
||||
"cities_boundaries_data": str,
|
||||
"data_path": str,
|
||||
"dump_wikipedia_urls": str,
|
||||
"geo_objects_features": str,
|
||||
"geo_objects_key_value": str,
|
||||
"ids_without_addresses": str,
|
||||
"idToWikidata": str,
|
||||
"intermediate_data_path": str,
|
||||
"isolines_path": str,
|
||||
"addresses_path": str,
|
||||
"nodes_list_path": str,
|
||||
"node_storage": str,
|
||||
"osm_file_name": str,
|
||||
"osm_file_type": str,
|
||||
"output": str,
|
||||
"planet_version": str,
|
||||
"popular_places_data": str,
|
||||
"regions_features": str,
|
||||
"regions_index": str,
|
||||
"regions_key_value": str,
|
||||
"srtm_path": str,
|
||||
"transit_path": str,
|
||||
"transit_path_experimental": str,
|
||||
"world_roads_path": str,
|
||||
"ugc_data": str,
|
||||
"uk_postcodes_dataset": str,
|
||||
"us_postcodes_dataset": str,
|
||||
"user_resource_path": str,
|
||||
"wikipedia_pages": str,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self, name_executable, out=subprocess.DEVNULL, err=subprocess.DEVNULL, **options
|
||||
):
|
||||
self.name_executable = name_executable
|
||||
self.subprocess = None
|
||||
self.output = out
|
||||
self.error = err
|
||||
self.options = {"threads_count": 1}
|
||||
self.logger = logger
|
||||
self.add_options(**options)
|
||||
|
||||
@property
|
||||
def args(self):
|
||||
return self._collect_cmd()
|
||||
|
||||
def add_options(self, **options):
|
||||
if "logger" in options:
|
||||
self.logger = options["logger"]
|
||||
|
||||
for k, v in options.items():
|
||||
if k == "logger":
|
||||
continue
|
||||
|
||||
if k not in GenTool.OPTIONS:
|
||||
raise OptionNotFound(f"{k} is unavailable option")
|
||||
|
||||
if type(v) is not GenTool.OPTIONS[k]:
|
||||
raise ValidationError(
|
||||
f"{k} required {str(GenTool.OPTIONS[k])},"
|
||||
f" but not {str(type(v))}"
|
||||
)
|
||||
|
||||
self.options[k] = str(v).lower() if type(v) is bool else v
|
||||
return self
|
||||
|
||||
def run_async(self):
|
||||
assert self.subprocess is None, "You forgot to call wait()"
|
||||
cmd = self._collect_cmd()
|
||||
self.subprocess = subprocess.Popen(
|
||||
cmd, stdout=self.output, stderr=self.error, env=os.environ
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f"Run generator tool [{self.get_build_version()}]:" f" {' '.join(cmd)} "
|
||||
)
|
||||
return self
|
||||
|
||||
def wait(self):
|
||||
code = self.subprocess.wait()
|
||||
self.subprocess = None
|
||||
return code
|
||||
|
||||
def run(self):
|
||||
self.run_async()
|
||||
wait_and_raise_if_fail(self)
|
||||
|
||||
def branch(self):
|
||||
c = GenTool(self.name_executable, out=self.output, err=self.error)
|
||||
c.options = copy.deepcopy(self.options)
|
||||
return c
|
||||
|
||||
def get_build_version(self):
|
||||
p = subprocess.Popen(
|
||||
[self.name_executable, "--version"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
env=os.environ,
|
||||
)
|
||||
wait_and_raise_if_fail(p)
|
||||
out, err = p.communicate()
|
||||
return out.decode("utf-8").replace("\n", " ").strip()
|
||||
|
||||
def _collect_cmd(self):
|
||||
options = ["".join(["--", k, "=", str(v)]) for k, v in self.options.items()]
|
||||
return [self.name_executable, *options]
|
||||
|
||||
|
||||
def run_gen_tool(*args, **kwargs):
|
||||
GenTool(*args, **kwargs).run()
|
||||
151
tools/python/maps_generator/generator/generation.py
Normal file
151
tools/python/maps_generator/generator/generation.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
import os
|
||||
from typing import AnyStr
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Type
|
||||
from typing import Union
|
||||
|
||||
import filelock
|
||||
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.exceptions import ContinueError
|
||||
from maps_generator.generator.stages import Stage
|
||||
from maps_generator.generator.stages import get_stage_name
|
||||
from maps_generator.generator.stages import stages
|
||||
from maps_generator.generator.status import Status
|
||||
from maps_generator.generator.status import without_stat_ext
|
||||
|
||||
|
||||
class Generation:
|
||||
"""
|
||||
Generation describes process of a map generation. It contains stages.
|
||||
|
||||
For example:
|
||||
generation = Generation(env)
|
||||
generation.add_stage(s1)
|
||||
generation.add_stage(s2)
|
||||
generation.run()
|
||||
"""
|
||||
|
||||
def __init__(self, env: Env, build_lock: bool = True):
|
||||
self.env: Env = env
|
||||
self.stages: List[Stage] = []
|
||||
self.runnable_stages: Optional[List[Stage]] = None
|
||||
self.build_lock: bool = build_lock
|
||||
|
||||
for country_stage in stages.countries_stages:
|
||||
if self.is_skipped_stage(country_stage):
|
||||
self.env.add_skipped_stage(country_stage)
|
||||
|
||||
for stage in stages.stages:
|
||||
if self.is_skipped_stage(stage):
|
||||
self.env.add_skipped_stage(stage)
|
||||
|
||||
def is_skipped_stage(self, stage: Union[Type[Stage], Stage]) -> bool:
|
||||
return (
|
||||
stage.is_production_only and not self.env.production
|
||||
) or not self.env.is_accepted_stage(stage)
|
||||
|
||||
def add_stage(self, stage: Stage):
|
||||
self.stages.append(stage)
|
||||
if self.is_skipped_stage(stage):
|
||||
self.env.add_skipped_stage(stage)
|
||||
|
||||
def pre_run(self):
|
||||
skipped = set()
|
||||
|
||||
def traverse(current: Type[Stage]):
|
||||
deps = stages.dependencies.get(current, [])
|
||||
for d in deps:
|
||||
skipped.add(d)
|
||||
traverse(d)
|
||||
|
||||
for skipped_stage in self.env.skipped_stages:
|
||||
traverse(skipped_stage)
|
||||
|
||||
for s in skipped:
|
||||
self.env.add_skipped_stage(s)
|
||||
|
||||
self.runnable_stages = [s for s in self.stages if self.env.is_accepted_stage(s)]
|
||||
|
||||
def run(self, from_stage: Optional[AnyStr] = None):
|
||||
self.pre_run()
|
||||
if from_stage is not None:
|
||||
self.reset_to_stage(from_stage)
|
||||
|
||||
if self.build_lock:
|
||||
lock_filename = f"{os.path.join(self.env.paths.build_path, 'lock')}.lock"
|
||||
with filelock.FileLock(lock_filename, timeout=1):
|
||||
self.run_stages()
|
||||
else:
|
||||
self.run_stages()
|
||||
|
||||
def run_stages(self):
|
||||
for stage in self.runnable_stages:
|
||||
stage(self.env)
|
||||
|
||||
def reset_to_stage(self, stage_name: AnyStr):
|
||||
"""
|
||||
Resets generation state to stage_name.
|
||||
Status files are overwritten new statuses according stage_name.
|
||||
It supposes that stages have next representation:
|
||||
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
|
||||
"""
|
||||
high_level_stages = [get_stage_name(s) for s in self.runnable_stages]
|
||||
if not (
|
||||
stage_name in high_level_stages
|
||||
or any(stage_name == get_stage_name(s) for s in stages.countries_stages)
|
||||
):
|
||||
raise ContinueError(f"{stage_name} not in {', '.join(high_level_stages)}.")
|
||||
|
||||
if not os.path.exists(self.env.paths.status_path):
|
||||
raise ContinueError(f"Status path {self.env.paths.status_path} not found.")
|
||||
|
||||
if not os.path.exists(self.env.paths.main_status_path):
|
||||
raise ContinueError(
|
||||
f"Status file {self.env.paths.main_status_path} not found."
|
||||
)
|
||||
|
||||
countries_statuses_paths = []
|
||||
countries = set(self.env.countries)
|
||||
for f in os.listdir(self.env.paths.status_path):
|
||||
full_name = os.path.join(self.env.paths.status_path, f)
|
||||
if (
|
||||
os.path.isfile(full_name)
|
||||
and full_name != self.env.paths.main_status_path
|
||||
and without_stat_ext(f) in countries
|
||||
):
|
||||
countries_statuses_paths.append(full_name)
|
||||
|
||||
def set_countries_stage(st):
|
||||
for path in countries_statuses_paths:
|
||||
Status(path, st).update_status()
|
||||
|
||||
def finish_countries_stage():
|
||||
for path in countries_statuses_paths:
|
||||
Status(path).finish()
|
||||
|
||||
def index(l: List, val):
|
||||
try:
|
||||
return l.index(val)
|
||||
except ValueError:
|
||||
return -1
|
||||
|
||||
mwm_stage_name = get_stage_name(stages.mwm_stage)
|
||||
stage_mwm_index = index(high_level_stages, mwm_stage_name)
|
||||
|
||||
main_status = None
|
||||
if (
|
||||
stage_mwm_index == -1
|
||||
or stage_name in high_level_stages[: stage_mwm_index + 1]
|
||||
):
|
||||
main_status = stage_name
|
||||
set_countries_stage("")
|
||||
elif stage_name in high_level_stages[stage_mwm_index + 1 :]:
|
||||
main_status = stage_name
|
||||
finish_countries_stage()
|
||||
else:
|
||||
main_status = get_stage_name(stages.mwm_stage)
|
||||
set_countries_stage(stage_name)
|
||||
|
||||
Status(self.env.paths.main_status_path, main_status).update_status()
|
||||
121
tools/python/maps_generator/generator/osmtools.py
Normal file
121
tools/python/maps_generator/generator/osmtools.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
import os
|
||||
import subprocess
|
||||
|
||||
from maps_generator.generator import settings
|
||||
from maps_generator.generator.exceptions import BadExitStatusError
|
||||
from maps_generator.generator.exceptions import wait_and_raise_if_fail
|
||||
|
||||
|
||||
def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL):
|
||||
src = {
|
||||
settings.OSM_TOOL_UPDATE: "osmupdate.c",
|
||||
settings.OSM_TOOL_FILTER: "osmfilter.c",
|
||||
settings.OSM_TOOL_CONVERT: "osmconvert.c",
|
||||
}
|
||||
ld_flags = ("-lz",)
|
||||
cc = []
|
||||
result = {}
|
||||
for executable, src in src.items():
|
||||
out = os.path.join(settings.OSM_TOOLS_PATH, executable)
|
||||
op = [
|
||||
settings.OSM_TOOLS_CC,
|
||||
*settings.OSM_TOOLS_CC_FLAGS,
|
||||
"-o",
|
||||
out,
|
||||
os.path.join(path, src),
|
||||
*ld_flags,
|
||||
]
|
||||
s = subprocess.Popen(op, stdout=output, stderr=error)
|
||||
cc.append(s)
|
||||
result[executable] = out
|
||||
|
||||
messages = []
|
||||
for c in cc:
|
||||
if c.wait() != os.EX_OK:
|
||||
messages.append(f"The launch of {' '.join(c.args)} failed.")
|
||||
if messages:
|
||||
raise BadExitStatusError("\n".join(messages))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def osmconvert(
|
||||
name_executable,
|
||||
in_file,
|
||||
out_file,
|
||||
output=subprocess.DEVNULL,
|
||||
error=subprocess.DEVNULL,
|
||||
run_async=False,
|
||||
**kwargs,
|
||||
):
|
||||
env = os.environ.copy()
|
||||
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
|
||||
p = subprocess.Popen(
|
||||
[
|
||||
name_executable,
|
||||
in_file,
|
||||
"--drop-author",
|
||||
"--drop-version",
|
||||
"--out-o5m",
|
||||
f"-o={out_file}",
|
||||
],
|
||||
env=env,
|
||||
stdout=output,
|
||||
stderr=error,
|
||||
)
|
||||
if run_async:
|
||||
return p
|
||||
else:
|
||||
wait_and_raise_if_fail(p)
|
||||
|
||||
|
||||
def osmupdate(
|
||||
name_executable,
|
||||
in_file,
|
||||
out_file,
|
||||
output=subprocess.DEVNULL,
|
||||
error=subprocess.DEVNULL,
|
||||
run_async=False,
|
||||
**kwargs,
|
||||
):
|
||||
env = os.environ.copy()
|
||||
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
|
||||
p = subprocess.Popen(
|
||||
[
|
||||
name_executable,
|
||||
"--drop-author",
|
||||
"--drop-version",
|
||||
"--out-o5m",
|
||||
"-v",
|
||||
in_file,
|
||||
out_file,
|
||||
],
|
||||
env=env,
|
||||
stdout=output,
|
||||
stderr=error,
|
||||
)
|
||||
if run_async:
|
||||
return p
|
||||
else:
|
||||
wait_and_raise_if_fail(p)
|
||||
|
||||
|
||||
def osmfilter(
|
||||
name_executable,
|
||||
in_file,
|
||||
out_file,
|
||||
output=subprocess.DEVNULL,
|
||||
error=subprocess.DEVNULL,
|
||||
run_async=False,
|
||||
**kwargs,
|
||||
):
|
||||
env = os.environ.copy()
|
||||
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
|
||||
args = [name_executable, in_file, f"-o={out_file}"] + [
|
||||
f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items()
|
||||
]
|
||||
p = subprocess.Popen(args, env=env, stdout=output, stderr=error)
|
||||
if run_async:
|
||||
return p
|
||||
else:
|
||||
wait_and_raise_if_fail(p)
|
||||
333
tools/python/maps_generator/generator/settings.py
Normal file
333
tools/python/maps_generator/generator/settings.py
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
import argparse
|
||||
import multiprocessing
|
||||
import os
|
||||
import site
|
||||
import sys
|
||||
from configparser import ConfigParser
|
||||
from configparser import ExtendedInterpolation
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import AnyStr
|
||||
|
||||
from maps_generator.utils.md5 import md5_ext
|
||||
from maps_generator.utils.system import total_virtual_memory
|
||||
|
||||
ETC_DIR = os.path.join(os.path.dirname(__file__), "..", "var", "etc")
|
||||
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
opt_config = "--config"
|
||||
parser.add_argument(opt_config, type=str, default="", help="Path to config")
|
||||
|
||||
|
||||
def get_config_path(config_path: AnyStr):
|
||||
"""
|
||||
It tries to get an opt_config value.
|
||||
If doesn't get the value a function returns config_path.
|
||||
"""
|
||||
argv = sys.argv
|
||||
indexes = (-1, -1)
|
||||
for i, opt in enumerate(argv):
|
||||
if opt.startswith(f"{opt_config}="):
|
||||
indexes = (i, i + 1)
|
||||
if opt == opt_config:
|
||||
indexes = (i, i + 2)
|
||||
|
||||
config_args = argv[indexes[0] : indexes[1]]
|
||||
if config_args:
|
||||
return parser.parse_args(config_args).config
|
||||
|
||||
config_var = os.environ.get(f"MM_GEN__CONFIG")
|
||||
return config_path if config_var is None else config_var
|
||||
|
||||
|
||||
class CfgReader:
|
||||
"""
|
||||
Config reader.
|
||||
There are 3 way of getting an option. In priority order:
|
||||
1. From system env.
|
||||
2. From config.
|
||||
3. From default values.
|
||||
|
||||
For using the option from system env you can build an option name as
|
||||
MM__GEN__ + [SECTION_NAME] + _ + [VALUE_NAME].
|
||||
"""
|
||||
|
||||
def __init__(self, default_settings_path: AnyStr):
|
||||
self.config = ConfigParser(interpolation=ExtendedInterpolation())
|
||||
self.config.read([get_config_path(default_settings_path)])
|
||||
|
||||
def get_opt(self, s: AnyStr, v: AnyStr, default: Any = None):
|
||||
val = CfgReader._get_env_val(s, v)
|
||||
if val is not None:
|
||||
return val
|
||||
|
||||
return self.config.get(s, v) if self.config.has_option(s, v) else default
|
||||
|
||||
def get_opt_path(self, s: AnyStr, v: AnyStr, default: AnyStr = ""):
|
||||
return os.path.expanduser(self.get_opt(s, v, default))
|
||||
|
||||
@staticmethod
|
||||
def _get_env_val(s: AnyStr, v: AnyStr):
|
||||
return os.environ.get(f"MM_GEN__{s.upper()}_{v.upper()}")
|
||||
|
||||
|
||||
DEFAULT_PLANET_URL = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf"
|
||||
|
||||
# Main section:
|
||||
# If DEBUG is True, a little special planet is downloaded.
|
||||
DEBUG = True
|
||||
_HOME_PATH = str(Path.home())
|
||||
_WORK_PATH = _HOME_PATH
|
||||
TMPDIR = os.path.join(_HOME_PATH, "tmp")
|
||||
MAIN_OUT_PATH = os.path.join(_WORK_PATH, "generation")
|
||||
CACHE_PATH = ""
|
||||
|
||||
# Developer section:
|
||||
BUILD_PATH = os.path.join(_WORK_PATH, "omim-build-relwithdebinfo")
|
||||
OMIM_PATH = os.path.join(_WORK_PATH, "omim")
|
||||
|
||||
# Osm tools section:
|
||||
OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools")
|
||||
OSM_TOOLS_PATH = os.path.join(_WORK_PATH, "osmctools")
|
||||
|
||||
# Generator tool section:
|
||||
USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data")
|
||||
NODE_STORAGE = "map"
|
||||
|
||||
# Stages section:
|
||||
NEED_PLANET_UPDATE = False
|
||||
THREADS_COUNT_FEATURES_STAGE = multiprocessing.cpu_count()
|
||||
DATA_ARCHIVE_DIR = ""
|
||||
DIFF_VERSION_DEPTH = 2
|
||||
|
||||
# Logging section:
|
||||
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
|
||||
|
||||
# External resources section:
|
||||
PLANET_URL = DEFAULT_PLANET_URL
|
||||
PLANET_COASTS_URL = ""
|
||||
UGC_URL = ""
|
||||
HOTELS_URL = ""
|
||||
PROMO_CATALOG_CITIES_URL = ""
|
||||
PROMO_CATALOG_COUNTRIES_URL = ""
|
||||
POPULARITY_URL = ""
|
||||
SUBWAY_URL = ""
|
||||
TRANSIT_URL = ""
|
||||
NEED_BUILD_WORLD_ROADS = True
|
||||
FOOD_URL = ""
|
||||
FOOD_TRANSLATIONS_URL = ""
|
||||
UK_POSTCODES_URL = ""
|
||||
US_POSTCODES_URL = ""
|
||||
SRTM_PATH = ""
|
||||
ISOLINES_PATH = ""
|
||||
ADDRESSES_PATH = ""
|
||||
|
||||
# Stats section:
|
||||
STATS_TYPES_CONFIG = os.path.join(ETC_DIR, "stats_types_config.txt")
|
||||
|
||||
# Other variables:
|
||||
PLANET = "planet"
|
||||
POSSIBLE_GEN_TOOL_NAMES = ("generator_tool", "omim-generator_tool")
|
||||
VERSION_FILE_NAME = "version.txt"
|
||||
|
||||
# Osm tools:
|
||||
OSM_TOOL_CONVERT = "osmconvert"
|
||||
OSM_TOOL_FILTER = "osmfilter"
|
||||
OSM_TOOL_UPDATE = "osmupdate"
|
||||
OSM_TOOLS_CC = "cc"
|
||||
OSM_TOOLS_CC_FLAGS = [
|
||||
"-O3",
|
||||
]
|
||||
|
||||
# Planet and coasts:
|
||||
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
|
||||
PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom")
|
||||
|
||||
# Common:
|
||||
THREADS_COUNT = multiprocessing.cpu_count()
|
||||
|
||||
# for lib logging
|
||||
LOGGING = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"standard": {"format": "[%(asctime)s] %(levelname)s %(module)s %(message)s"},
|
||||
},
|
||||
"handlers": {
|
||||
"stdout": {
|
||||
"level": "INFO",
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "standard",
|
||||
},
|
||||
"file": {
|
||||
"level": "DEBUG",
|
||||
"class": "logging.handlers.WatchedFileHandler",
|
||||
"formatter": "standard",
|
||||
"filename": LOG_FILE_PATH,
|
||||
},
|
||||
},
|
||||
"loggers": {
|
||||
"maps_generator": {
|
||||
"handlers": ["stdout", "file"],
|
||||
"level": "DEBUG",
|
||||
"propagate": True,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def init(default_settings_path: AnyStr):
|
||||
# Try to read a config and to overload default settings
|
||||
cfg = CfgReader(default_settings_path)
|
||||
|
||||
# Main section:
|
||||
global DEBUG
|
||||
global TMPDIR
|
||||
global MAIN_OUT_PATH
|
||||
global CACHE_PATH
|
||||
_DEBUG = cfg.get_opt("Main", "DEBUG")
|
||||
DEBUG = DEBUG if _DEBUG is None else int(_DEBUG)
|
||||
TMPDIR = cfg.get_opt_path("Main", "TMPDIR", TMPDIR)
|
||||
MAIN_OUT_PATH = cfg.get_opt_path("Main", "MAIN_OUT_PATH", MAIN_OUT_PATH)
|
||||
CACHE_PATH = cfg.get_opt_path("Main", "CACHE_PATH", CACHE_PATH)
|
||||
|
||||
# Developer section:
|
||||
global BUILD_PATH
|
||||
global OMIM_PATH
|
||||
BUILD_PATH = cfg.get_opt_path("Developer", "BUILD_PATH", BUILD_PATH)
|
||||
OMIM_PATH = cfg.get_opt_path("Developer", "OMIM_PATH", OMIM_PATH)
|
||||
|
||||
# Osm tools section:
|
||||
global OSM_TOOLS_SRC_PATH
|
||||
global OSM_TOOLS_PATH
|
||||
OSM_TOOLS_SRC_PATH = cfg.get_opt_path(
|
||||
"Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH
|
||||
)
|
||||
OSM_TOOLS_PATH = cfg.get_opt_path("Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH)
|
||||
|
||||
# Generator tool section:
|
||||
global USER_RESOURCE_PATH
|
||||
global NODE_STORAGE
|
||||
USER_RESOURCE_PATH = cfg.get_opt_path(
|
||||
"Generator tool", "USER_RESOURCE_PATH", USER_RESOURCE_PATH
|
||||
)
|
||||
NODE_STORAGE = cfg.get_opt("Generator tool", "NODE_STORAGE", NODE_STORAGE)
|
||||
|
||||
assert os.path.exists(OMIM_PATH) is True, f"Can't find OMIM_PATH (set to {OMIM_PATH})"
|
||||
|
||||
if not os.path.exists(USER_RESOURCE_PATH):
|
||||
from data_files import find_data_files
|
||||
|
||||
USER_RESOURCE_PATH = find_data_files("omim-data")
|
||||
assert USER_RESOURCE_PATH is not None
|
||||
|
||||
import borders
|
||||
|
||||
# Issue: If maps_generator is installed in your system as a system
|
||||
# package and borders.init() is called first time, call borders.init()
|
||||
# might return False, because you need root permission.
|
||||
assert borders.init()
|
||||
|
||||
# Stages section:
|
||||
global NEED_PLANET_UPDATE
|
||||
global DATA_ARCHIVE_DIR
|
||||
global DIFF_VERSION_DEPTH
|
||||
global THREADS_COUNT_FEATURES_STAGE
|
||||
NEED_PLANET_UPDATE = cfg.get_opt("Stages", "NEED_PLANET_UPDATE", NEED_PLANET_UPDATE)
|
||||
DATA_ARCHIVE_DIR = cfg.get_opt_path(
|
||||
"Stages", "DATA_ARCHIVE_DIR", DATA_ARCHIVE_DIR
|
||||
)
|
||||
DIFF_VERSION_DEPTH = int(cfg.get_opt(
|
||||
"Stages", "DIFF_VERSION_DEPTH", DIFF_VERSION_DEPTH
|
||||
))
|
||||
|
||||
threads_count = int(
|
||||
cfg.get_opt(
|
||||
"Generator tool",
|
||||
"THREADS_COUNT_FEATURES_STAGE",
|
||||
THREADS_COUNT_FEATURES_STAGE,
|
||||
)
|
||||
)
|
||||
if threads_count > 0:
|
||||
THREADS_COUNT_FEATURES_STAGE = threads_count
|
||||
|
||||
# Logging section:
|
||||
global LOG_FILE_PATH
|
||||
global LOGGING
|
||||
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
|
||||
LOG_FILE_PATH = cfg.get_opt_path("Logging", "MAIN_LOG", LOG_FILE_PATH)
|
||||
os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True)
|
||||
LOGGING["handlers"]["file"]["filename"] = LOG_FILE_PATH
|
||||
|
||||
# External section:
|
||||
global PLANET_URL
|
||||
global PLANET_MD5_URL
|
||||
global PLANET_COASTS_URL
|
||||
global UGC_URL
|
||||
global HOTELS_URL
|
||||
global PROMO_CATALOG_CITIES_URL
|
||||
global PROMO_CATALOG_COUNTRIES_URL
|
||||
global POPULARITY_URL
|
||||
global SUBWAY_URL
|
||||
global TRANSIT_URL
|
||||
global NEED_BUILD_WORLD_ROADS
|
||||
global FOOD_URL
|
||||
global UK_POSTCODES_URL
|
||||
global US_POSTCODES_URL
|
||||
global FOOD_TRANSLATIONS_URL
|
||||
global SRTM_PATH
|
||||
global ISOLINES_PATH
|
||||
global ADDRESSES_PATH
|
||||
|
||||
PLANET_URL = cfg.get_opt_path("External", "PLANET_URL", PLANET_URL)
|
||||
PLANET_MD5_URL = cfg.get_opt_path("External", "PLANET_MD5_URL", md5_ext(PLANET_URL))
|
||||
PLANET_COASTS_URL = cfg.get_opt_path(
|
||||
"External", "PLANET_COASTS_URL", PLANET_COASTS_URL
|
||||
)
|
||||
UGC_URL = cfg.get_opt_path("External", "UGC_URL", UGC_URL)
|
||||
HOTELS_URL = cfg.get_opt_path("External", "HOTELS_URL", HOTELS_URL)
|
||||
PROMO_CATALOG_CITIES_URL = cfg.get_opt_path(
|
||||
"External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL
|
||||
)
|
||||
PROMO_CATALOG_COUNTRIES_URL = cfg.get_opt_path(
|
||||
"External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL
|
||||
)
|
||||
POPULARITY_URL = cfg.get_opt_path("External", "POPULARITY_URL", POPULARITY_URL)
|
||||
SUBWAY_URL = cfg.get_opt("External", "SUBWAY_URL", SUBWAY_URL)
|
||||
TRANSIT_URL = cfg.get_opt("External", "TRANSIT_URL", TRANSIT_URL)
|
||||
NEED_BUILD_WORLD_ROADS = cfg.get_opt("External", "NEED_BUILD_WORLD_ROADS", NEED_BUILD_WORLD_ROADS)
|
||||
FOOD_URL = cfg.get_opt("External", "FOOD_URL", FOOD_URL)
|
||||
|
||||
UK_POSTCODES_URL = cfg.get_opt("External", "UK_POSTCODES_URL", UK_POSTCODES_URL)
|
||||
US_POSTCODES_URL = cfg.get_opt("External", "US_POSTCODES_URL", US_POSTCODES_URL)
|
||||
FOOD_TRANSLATIONS_URL = cfg.get_opt(
|
||||
"External", "FOOD_TRANSLATIONS_URL", FOOD_TRANSLATIONS_URL
|
||||
)
|
||||
SRTM_PATH = cfg.get_opt_path("External", "SRTM_PATH", SRTM_PATH)
|
||||
ISOLINES_PATH = cfg.get_opt_path("External", "ISOLINES_PATH", ISOLINES_PATH)
|
||||
ADDRESSES_PATH = cfg.get_opt_path("External", "ADDRESSES_PATH", ADDRESSES_PATH)
|
||||
|
||||
# Stats section:
|
||||
global STATS_TYPES_CONFIG
|
||||
STATS_TYPES_CONFIG = cfg.get_opt_path(
|
||||
"Stats", "STATS_TYPES_CONFIG", STATS_TYPES_CONFIG
|
||||
)
|
||||
|
||||
# Common:
|
||||
global THREADS_COUNT
|
||||
threads_count = int(cfg.get_opt("Common", "THREADS_COUNT", THREADS_COUNT))
|
||||
if threads_count > 0:
|
||||
THREADS_COUNT = threads_count
|
||||
|
||||
# Planet and costs:
|
||||
global PLANET_COASTS_GEOM_URL
|
||||
global PLANET_COASTS_RAWGEOM_URL
|
||||
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
|
||||
PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom")
|
||||
|
||||
if DEBUG:
|
||||
PLANET_URL = "https://www.dropbox.com/s/m3ru5tnj8g9u4cz/planet-latest.o5m?raw=1"
|
||||
PLANET_MD5_URL = (
|
||||
"https://www.dropbox.com/s/8wdl2hy22jgisk5/planet-latest.o5m.md5?raw=1"
|
||||
)
|
||||
NEED_PLANET_UPDATE = False
|
||||
380
tools/python/maps_generator/generator/stages.py
Normal file
380
tools/python/maps_generator/generator/stages.py
Normal file
|
|
@ -0,0 +1,380 @@
|
|||
""""
|
||||
This file contains some decorators that define stages.
|
||||
There are two main types of stages:
|
||||
1. outer_stage - a high level stage
|
||||
2. country_stage - a stage that applies to countries files(*.mwm).
|
||||
|
||||
country_stage might be inside stage. There are country stages inside mwm_stage.
|
||||
mwm_stage is only one stage that contains country_stages.
|
||||
"""
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from abc import ABC
|
||||
from abc import abstractmethod
|
||||
from collections import defaultdict
|
||||
from typing import AnyStr
|
||||
from typing import Callable
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Type
|
||||
from typing import Union
|
||||
|
||||
import filelock
|
||||
|
||||
from maps_generator.generator import status
|
||||
from maps_generator.generator.exceptions import FailedTest
|
||||
from maps_generator.utils.file import download_files
|
||||
from maps_generator.utils.file import normalize_url_to_path_dict
|
||||
from maps_generator.utils.log import DummyObject
|
||||
from maps_generator.utils.log import create_file_handler
|
||||
from maps_generator.utils.log import create_file_logger
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
|
||||
class InternalDependency:
|
||||
def __init__(self, url, path_method, mode=""):
|
||||
self.url = url
|
||||
self.path_method = path_method
|
||||
self.mode = mode
|
||||
|
||||
|
||||
class Test:
|
||||
def __init__(self, test, need_run=None, is_pretest=False):
|
||||
self._test = test
|
||||
self._need_run = need_run
|
||||
self.is_pretest = is_pretest
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._test.__name__
|
||||
|
||||
def need_run(self, env, _logger):
|
||||
if self._need_run is None:
|
||||
return True
|
||||
|
||||
if callable(self._need_run):
|
||||
return self._need_run(env, _logger)
|
||||
|
||||
return self._need_run
|
||||
|
||||
def test(self, env, _logger, *args, **kwargs):
|
||||
try:
|
||||
res = self._test(env, _logger, *args, **kwargs)
|
||||
except Exception as e:
|
||||
raise FailedTest(f"Test {self.name} is failed.") from e
|
||||
|
||||
if not res:
|
||||
raise FailedTest(f"Test {self.name} is failed.")
|
||||
|
||||
_logger.info(f"Test {self.name} is successfully completed.")
|
||||
|
||||
|
||||
class Stage(ABC):
|
||||
need_planet_lock = False
|
||||
need_build_lock = False
|
||||
is_helper = False
|
||||
is_mwm_stage = False
|
||||
is_production_only = False
|
||||
|
||||
def __init__(self, **args):
|
||||
self.args = args
|
||||
|
||||
def __call__(self, env: "Env"):
|
||||
return self.apply(env, **self.args)
|
||||
|
||||
@abstractmethod
|
||||
def apply(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
def get_stage_name(stage: Union[Type[Stage], Stage]) -> AnyStr:
|
||||
n = stage.__class__.__name__ if isinstance(stage, Stage) else stage.__name__
|
||||
return n.replace("Stage", "")
|
||||
|
||||
|
||||
def get_stage_type(stage: Union[Type[Stage], AnyStr]):
|
||||
from . import stages_declaration as sd
|
||||
|
||||
if isinstance(stage, str):
|
||||
if not stage.startswith("Stage"):
|
||||
stage = f"Stage{stage}"
|
||||
return getattr(sd, stage)
|
||||
|
||||
return stage
|
||||
|
||||
|
||||
class Stages:
|
||||
"""Stages class is used for storing all stages."""
|
||||
|
||||
def __init__(self):
|
||||
self.mwm_stage: Optional[Type[Stage]] = None
|
||||
self.countries_stages: List[Type[Stage]] = []
|
||||
self.stages: List[Type[Stage]] = []
|
||||
self.helper_stages: List[Type[Stage]] = []
|
||||
self.dependencies = defaultdict(set)
|
||||
|
||||
def init(self):
|
||||
# We normalize self.dependencies to Dict[Type[Stage], Set[Type[Stage]]].
|
||||
dependencies = defaultdict(set)
|
||||
for k, v in self.dependencies.items():
|
||||
dependencies[get_stage_type(k)] = set(get_stage_type(x) for x in v)
|
||||
self.dependencies = dependencies
|
||||
|
||||
def set_mwm_stage(self, stage: Type[Stage]):
|
||||
assert self.mwm_stage is None
|
||||
self.mwm_stage = stage
|
||||
|
||||
def add_helper_stage(self, stage: Type[Stage]):
|
||||
self.helper_stages.append(stage)
|
||||
|
||||
def add_country_stage(self, stage: Type[Stage]):
|
||||
self.countries_stages.append(stage)
|
||||
|
||||
def add_stage(self, stage: Type[Stage]):
|
||||
self.stages.append(stage)
|
||||
|
||||
def add_dependency_for(self, stage: Type[Stage], *deps):
|
||||
for dep in deps:
|
||||
self.dependencies[stage].add(dep)
|
||||
|
||||
def get_invisible_stages_names(self) -> List[AnyStr]:
|
||||
return [get_stage_name(st) for st in self.helper_stages]
|
||||
|
||||
def get_visible_stages_names(self) -> List[AnyStr]:
|
||||
"""Returns all stages names except helper stages names."""
|
||||
stages = []
|
||||
for s in self.stages:
|
||||
stages.append(get_stage_name(s))
|
||||
if s == self.mwm_stage:
|
||||
stages += [get_stage_name(st) for st in self.countries_stages]
|
||||
return stages
|
||||
|
||||
def is_valid_stage_name(self, stage_name) -> bool:
|
||||
return get_stage_name(self.mwm_stage) == stage_name or any(
|
||||
any(stage_name == get_stage_name(x) for x in c)
|
||||
for c in [self.countries_stages, self.stages, self.helper_stages]
|
||||
)
|
||||
|
||||
|
||||
# A global variable stage contains all possible stages.
|
||||
stages = Stages()
|
||||
|
||||
|
||||
def outer_stage(stage: Type[Stage]) -> Type[Stage]:
|
||||
"""It's decorator that defines high level stage."""
|
||||
if stage.is_helper:
|
||||
stages.add_helper_stage(stage)
|
||||
else:
|
||||
stages.add_stage(stage)
|
||||
if stage.is_mwm_stage:
|
||||
stages.set_mwm_stage(stage)
|
||||
|
||||
def new_apply(method):
|
||||
def apply(obj: Stage, env: "Env", *args, **kwargs):
|
||||
name = get_stage_name(obj)
|
||||
logfile = os.path.join(env.paths.log_path, f"{name}.log")
|
||||
log_handler = create_file_handler(logfile)
|
||||
logger.addHandler(log_handler)
|
||||
# This message is used as an anchor for parsing logs.
|
||||
# See maps_generator/checks/logs/logs_reader.py STAGE_START_MSG_PATTERN
|
||||
logger.info(f"Stage {name}: start ...")
|
||||
t = time.time()
|
||||
try:
|
||||
if not env.is_accepted_stage(stage):
|
||||
logger.info(f"Stage {name} was not accepted.")
|
||||
return
|
||||
|
||||
main_status = env.main_status
|
||||
main_status.init(env.paths.main_status_path, name)
|
||||
if main_status.need_skip():
|
||||
logger.warning(f"Stage {name} was skipped.")
|
||||
return
|
||||
|
||||
main_status.update_status()
|
||||
env.set_subprocess_out(log_handler.stream)
|
||||
method(obj, env, *args, **kwargs)
|
||||
finally:
|
||||
d = time.time() - t
|
||||
# This message is used as an anchor for parsing logs.
|
||||
# See maps_generator/checks/logs/logs_reader.py STAGE_FINISH_MSG_PATTERN
|
||||
logger.info(
|
||||
f"Stage {name}: finished in {str(datetime.timedelta(seconds=d))}"
|
||||
)
|
||||
logger.removeHandler(log_handler)
|
||||
|
||||
return apply
|
||||
|
||||
stage.apply = new_apply(stage.apply)
|
||||
return stage
|
||||
|
||||
|
||||
def country_stage_status(stage: Type[Stage]) -> Type[Stage]:
|
||||
"""It's helper decorator that works with status file."""
|
||||
|
||||
def new_apply(method):
|
||||
def apply(obj: Stage, env: "Env", country: AnyStr, *args, **kwargs):
|
||||
name = get_stage_name(obj)
|
||||
_logger = DummyObject()
|
||||
countries_meta = env.countries_meta
|
||||
if "logger" in countries_meta[country]:
|
||||
_logger, _ = countries_meta[country]["logger"]
|
||||
|
||||
if not env.is_accepted_stage(stage):
|
||||
_logger.info(f"Stage {name} was not accepted.")
|
||||
return
|
||||
|
||||
if "status" not in countries_meta[country]:
|
||||
countries_meta[country]["status"] = status.Status()
|
||||
|
||||
country_status = countries_meta[country]["status"]
|
||||
status_file = os.path.join(
|
||||
env.paths.status_path, status.with_stat_ext(country)
|
||||
)
|
||||
country_status.init(status_file, name)
|
||||
if country_status.need_skip():
|
||||
_logger.warning(f"Stage {name} was skipped.")
|
||||
return
|
||||
|
||||
country_status.update_status()
|
||||
method(obj, env, country, *args, **kwargs)
|
||||
|
||||
return apply
|
||||
|
||||
stage.apply = new_apply(stage.apply)
|
||||
return stage
|
||||
|
||||
|
||||
def country_stage_log(stage: Type[Stage]) -> Type[Stage]:
|
||||
"""It's helper decorator that works with log file."""
|
||||
|
||||
def new_apply(method):
|
||||
def apply(obj: Stage, env: "Env", country: AnyStr, *args, **kwargs):
|
||||
name = get_stage_name(obj)
|
||||
log_file = os.path.join(env.paths.log_path, f"{country}.log")
|
||||
countries_meta = env.countries_meta
|
||||
if "logger" not in countries_meta[country]:
|
||||
countries_meta[country]["logger"] = create_file_logger(log_file)
|
||||
|
||||
_logger, log_handler = countries_meta[country]["logger"]
|
||||
# This message is used as an anchor for parsing logs.
|
||||
# See maps_generator/checks/logs/logs_reader.py STAGE_START_MSG_PATTERN
|
||||
_logger.info(f"Stage {name}: start ...")
|
||||
t = time.time()
|
||||
env.set_subprocess_out(log_handler.stream, country)
|
||||
method(obj, env, country, *args, logger=_logger, **kwargs)
|
||||
d = time.time() - t
|
||||
# This message is used as an anchor for parsing logs.
|
||||
# See maps_generator/checks/logs/logs_reader.py STAGE_FINISH_MSG_PATTERN
|
||||
_logger.info(
|
||||
f"Stage {name}: finished in {str(datetime.timedelta(seconds=d))}"
|
||||
)
|
||||
|
||||
return apply
|
||||
|
||||
stage.apply = new_apply(stage.apply)
|
||||
return stage
|
||||
|
||||
|
||||
def test_stage(*tests: Test) -> Callable[[Type[Stage],], Type[Stage]]:
|
||||
def new_apply(method):
|
||||
def apply(obj: Stage, env: "Env", *args, **kwargs):
|
||||
_logger = kwargs["logger"] if "logger" in kwargs else logger
|
||||
|
||||
def run_tests(tests):
|
||||
for test in tests:
|
||||
if test.need_run(env, _logger):
|
||||
test.test(env, _logger, *args, **kwargs)
|
||||
else:
|
||||
_logger.info(f"Test {test.name} was skipped.")
|
||||
|
||||
run_tests(filter(lambda t: t.is_pretest, tests))
|
||||
method(obj, env, *args, **kwargs)
|
||||
run_tests(filter(lambda t: not t.is_pretest, tests))
|
||||
|
||||
return apply
|
||||
|
||||
def wrapper(stage: Type[Stage]) -> Type[Stage]:
|
||||
stage.apply = new_apply(stage.apply)
|
||||
return stage
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def country_stage(stage: Type[Stage]) -> Type[Stage]:
|
||||
"""It's decorator that defines country stage."""
|
||||
if stage.is_helper:
|
||||
stages.add_helper_stage(stage)
|
||||
else:
|
||||
stages.add_country_stage(stage)
|
||||
|
||||
return country_stage_log(country_stage_status(stage))
|
||||
|
||||
|
||||
def mwm_stage(stage: Type[Stage]) -> Type[Stage]:
|
||||
stage.is_mwm_stage = True
|
||||
return stage
|
||||
|
||||
|
||||
def production_only(stage: Type[Stage]) -> Type[Stage]:
|
||||
stage.is_production_only = True
|
||||
return stage
|
||||
|
||||
|
||||
def helper_stage_for(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
|
||||
def wrapper(stage: Type[Stage]) -> Type[Stage]:
|
||||
stages.add_dependency_for(stage, *deps)
|
||||
stage.is_helper = True
|
||||
return stage
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def depends_from_internal(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
|
||||
def get_urls(
|
||||
env: "Env", internal_dependencies: List[InternalDependency]
|
||||
) -> Dict[AnyStr, AnyStr]:
|
||||
deps = {}
|
||||
for d in internal_dependencies:
|
||||
if "p" in d.mode and not env.production or not d.url:
|
||||
continue
|
||||
|
||||
path = None
|
||||
if type(d.path_method) is property:
|
||||
path = d.path_method.__get__(env.paths)
|
||||
|
||||
assert path is not None, type(d.path_method)
|
||||
deps[d.url] = path
|
||||
|
||||
return deps
|
||||
|
||||
def download_under_lock(env: "Env", urls: Dict[AnyStr, AnyStr], stage_name: AnyStr):
|
||||
lock_name = f"{os.path.join(env.paths.status_path, stage_name)}.lock"
|
||||
status_name = f"{os.path.join(env.paths.status_path, stage_name)}.download"
|
||||
with filelock.FileLock(lock_name):
|
||||
s = status.Status(status_name)
|
||||
if not s.is_finished():
|
||||
urls = normalize_url_to_path_dict(urls)
|
||||
download_files(urls, env.force_download_files)
|
||||
s.finish()
|
||||
|
||||
def new_apply(method):
|
||||
def apply(obj: Stage, env: "Env", *args, **kwargs):
|
||||
if hasattr(obj, "internal_dependencies") and obj.internal_dependencies:
|
||||
urls = get_urls(env, obj.internal_dependencies)
|
||||
if urls:
|
||||
download_under_lock(env, urls, get_stage_name(obj))
|
||||
|
||||
method(obj, env, *args, **kwargs)
|
||||
|
||||
return apply
|
||||
|
||||
def wrapper(stage: Type[Stage]) -> Type[Stage]:
|
||||
stage.internal_dependencies = deps
|
||||
stage.apply = new_apply(stage.apply)
|
||||
return stage
|
||||
|
||||
return wrapper
|
||||
446
tools/python/maps_generator/generator/stages_declaration.py
Normal file
446
tools/python/maps_generator/generator/stages_declaration.py
Normal file
|
|
@ -0,0 +1,446 @@
|
|||
""""
|
||||
This file contains possible stages that maps_generator can run.
|
||||
Some algorithms suppose a maps genration processes looks like:
|
||||
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
|
||||
Only stage_mwm can contain country_
|
||||
"""
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
import tarfile
|
||||
import errno
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import AnyStr
|
||||
from typing import Type
|
||||
|
||||
import maps_generator.generator.diffs as diffs
|
||||
import maps_generator.generator.stages_tests as st
|
||||
# from descriptions.descriptions_downloader import check_and_get_checker
|
||||
# from descriptions.descriptions_downloader import download_from_wikidata_tags
|
||||
# from descriptions.descriptions_downloader import download_from_wikipedia_tags
|
||||
from maps_generator.generator import coastline
|
||||
from maps_generator.generator import settings
|
||||
from maps_generator.generator import steps
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.env import PathProvider
|
||||
from maps_generator.generator.env import WORLD_COASTS_NAME
|
||||
from maps_generator.generator.env import WORLD_NAME
|
||||
from maps_generator.generator.exceptions import BadExitStatusError
|
||||
from maps_generator.generator.gen_tool import run_gen_tool
|
||||
from maps_generator.generator.stages import InternalDependency as D
|
||||
from maps_generator.generator.stages import Stage
|
||||
from maps_generator.generator.stages import Test
|
||||
from maps_generator.generator.stages import country_stage
|
||||
from maps_generator.generator.stages import depends_from_internal
|
||||
from maps_generator.generator.stages import helper_stage_for
|
||||
from maps_generator.generator.stages import mwm_stage
|
||||
from maps_generator.generator.stages import outer_stage
|
||||
from maps_generator.generator.stages import production_only
|
||||
from maps_generator.generator.stages import test_stage
|
||||
from maps_generator.generator.statistics import get_stages_info
|
||||
from maps_generator.utils.file import download_files
|
||||
from maps_generator.utils.file import is_verified
|
||||
from post_generation.hierarchy_to_countries import hierarchy_to_countries
|
||||
from post_generation.inject_promo_ids import inject_promo_ids
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
|
||||
def is_accepted(env: Env, stage: Type[Stage]) -> bool:
|
||||
return env.is_accepted_stage(stage)
|
||||
|
||||
|
||||
@outer_stage
|
||||
class StageDownloadAndConvertPlanet(Stage):
|
||||
def apply(self, env: Env, force_download: bool = True, **kwargs):
|
||||
if force_download or not is_verified(env.paths.planet_o5m):
|
||||
steps.step_download_and_convert_planet(
|
||||
env, force_download=force_download, **kwargs
|
||||
)
|
||||
|
||||
|
||||
@outer_stage
|
||||
class StageUpdatePlanet(Stage):
|
||||
def apply(self, env: Env, **kwargs):
|
||||
steps.step_update_planet(env, **kwargs)
|
||||
|
||||
|
||||
@outer_stage
|
||||
class StageCoastline(Stage):
|
||||
def apply(self, env: Env, use_old_if_fail=True):
|
||||
coasts_geom = "WorldCoasts.geom"
|
||||
coasts_rawgeom = "WorldCoasts.rawgeom"
|
||||
try:
|
||||
coastline.make_coastline(env)
|
||||
except BadExitStatusError as e:
|
||||
if not use_old_if_fail:
|
||||
raise e
|
||||
|
||||
logger.warning("Build coasts failed. Try to download the coasts...")
|
||||
download_files(
|
||||
{
|
||||
settings.PLANET_COASTS_GEOM_URL: os.path.join(
|
||||
env.paths.coastline_path, coasts_geom
|
||||
),
|
||||
settings.PLANET_COASTS_RAWGEOM_URL: os.path.join(
|
||||
env.paths.coastline_path, coasts_rawgeom
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
for f in [coasts_geom, coasts_rawgeom]:
|
||||
path = os.path.join(env.paths.coastline_path, f)
|
||||
shutil.copy2(path, env.paths.intermediate_data_path)
|
||||
|
||||
|
||||
@outer_stage
|
||||
class StagePreprocess(Stage):
|
||||
def apply(self, env: Env, **kwargs):
|
||||
steps.step_preprocess(env, **kwargs)
|
||||
|
||||
|
||||
@outer_stage
|
||||
@depends_from_internal(
|
||||
D(settings.HOTELS_URL, PathProvider.hotels_path, "p"),
|
||||
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
|
||||
D(settings.POPULARITY_URL, PathProvider.popularity_path, "p"),
|
||||
D(settings.FOOD_URL, PathProvider.food_paths, "p"),
|
||||
D(settings.FOOD_TRANSLATIONS_URL, PathProvider.food_translations_path, "p"),
|
||||
)
|
||||
@test_stage(
|
||||
Test(st.make_test_booking_data(max_days=7), lambda e, _: e.production, True)
|
||||
)
|
||||
class StageFeatures(Stage):
|
||||
def apply(self, env: Env):
|
||||
extra = {}
|
||||
if is_accepted(env, StageDescriptions):
|
||||
extra.update({"idToWikidata": env.paths.id_to_wikidata_path})
|
||||
if env.production:
|
||||
extra.update(
|
||||
{
|
||||
"booking_data": env.paths.hotels_path,
|
||||
"promo_catalog_cities": env.paths.promo_catalog_cities_path,
|
||||
"popular_places_data": env.paths.popularity_path,
|
||||
"brands_data": env.paths.food_paths,
|
||||
"brands_translations_data": env.paths.food_translations_path,
|
||||
}
|
||||
)
|
||||
if is_accepted(env, StageCoastline):
|
||||
extra.update({"emit_coasts": True})
|
||||
if is_accepted(env, StageIsolinesInfo):
|
||||
extra.update({"isolines_path": PathProvider.isolines_path()})
|
||||
extra.update({"addresses_path": PathProvider.addresses_path()})
|
||||
|
||||
steps.step_features(env, **extra)
|
||||
if os.path.exists(env.paths.packed_polygons_path):
|
||||
shutil.copy2(env.paths.packed_polygons_path, env.paths.mwm_path)
|
||||
|
||||
|
||||
@outer_stage
|
||||
@helper_stage_for("StageDescriptions")
|
||||
class StageDownloadDescriptions(Stage):
|
||||
def apply(self, env: Env):
|
||||
"""
|
||||
run_gen_tool(
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(),
|
||||
err=env.get_subprocess_out(),
|
||||
data_path=env.paths.data_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
dump_wikipedia_urls=env.paths.wiki_url_path,
|
||||
idToWikidata=env.paths.id_to_wikidata_path,
|
||||
threads_count=settings.THREADS_COUNT,
|
||||
)
|
||||
|
||||
# https://en.wikipedia.org/wiki/Wikipedia:Multilingual_statistics
|
||||
langs = ("en", "de", "fr", "es", "ru", "tr")
|
||||
checker = check_and_get_checker(env.paths.popularity_path)
|
||||
download_from_wikipedia_tags(
|
||||
env.paths.wiki_url_path, env.paths.descriptions_path, langs, checker
|
||||
)
|
||||
download_from_wikidata_tags(
|
||||
env.paths.id_to_wikidata_path, env.paths.descriptions_path, langs, checker
|
||||
)
|
||||
"""
|
||||
|
||||
# The src folder is hardcoded here and must be implemented on the map building machine
|
||||
src = "/home/planet/wikipedia/descriptions"
|
||||
# The dest folder will generally become build/*/intermediate_data/descriptions
|
||||
dest = env.paths.descriptions_path
|
||||
# An empty source folder is a big problem
|
||||
try:
|
||||
if os.path.isdir(src):
|
||||
print("Found %s" % (src))
|
||||
else:
|
||||
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), src)
|
||||
except OSError as e:
|
||||
print("rmtree error: %s - %s" % (e.filename, e.strerror))
|
||||
# Empty folder "descriptions" can be already created.
|
||||
try:
|
||||
if os.path.isdir(dest):
|
||||
shutil.rmtree(dest)
|
||||
else:
|
||||
os.remove(dest)
|
||||
except OSError as e:
|
||||
print("rmtree error: %s - %s" % (e.filename, e.strerror))
|
||||
|
||||
os.symlink(src, dest)
|
||||
|
||||
|
||||
@outer_stage
|
||||
@mwm_stage
|
||||
class StageMwm(Stage):
|
||||
def apply(self, env: Env):
|
||||
tmp_mwm_names = env.get_tmp_mwm_names()
|
||||
if len(tmp_mwm_names):
|
||||
logger.info(f'Number of feature data .mwm.tmp country files to process: {len(tmp_mwm_names)}')
|
||||
with ThreadPoolExecutor(settings.THREADS_COUNT) as pool:
|
||||
pool.map(
|
||||
lambda c: StageMwm.make_mwm(c, env),
|
||||
tmp_mwm_names
|
||||
)
|
||||
else:
|
||||
# TODO: list all countries that were not found?
|
||||
logger.warning(f'There are no feature data .mwm.tmp country files to process in {env.paths.intermediate_tmp_path}!')
|
||||
logger.warning('Countries requested for generation are not in the supplied planet file?')
|
||||
|
||||
@staticmethod
|
||||
def make_mwm(country: AnyStr, env: Env):
|
||||
logger.info(f'Starting mwm generation for {country}')
|
||||
world_stages = {
|
||||
WORLD_NAME: [
|
||||
StageIndex,
|
||||
StageCitiesIdsWorld,
|
||||
StagePopularityWorld,
|
||||
StagePrepareRoutingWorld,
|
||||
StageRoutingWorld,
|
||||
StageMwmStatistics,
|
||||
],
|
||||
WORLD_COASTS_NAME: [StageIndex, StageMwmStatistics],
|
||||
}
|
||||
|
||||
mwm_stages = [
|
||||
StageIndex,
|
||||
StageUgc,
|
||||
StageSrtm,
|
||||
StageIsolinesInfo,
|
||||
StageDescriptions,
|
||||
# call after descriptions
|
||||
StagePopularity,
|
||||
StageRouting,
|
||||
StageRoutingTransit,
|
||||
StageMwmDiffs,
|
||||
StageMwmStatistics,
|
||||
]
|
||||
|
||||
for stage in world_stages.get(country, mwm_stages):
|
||||
logger.info(f'{country} mwm stage {stage.__name__}: start...')
|
||||
stage(country=country)(env)
|
||||
|
||||
env.finish_mwm(country)
|
||||
logger.info(f'Finished mwm generation for {country}')
|
||||
|
||||
|
||||
@country_stage
|
||||
class StageIndex(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
if country == WORLD_NAME:
|
||||
steps.step_index_world(env, country, **kwargs)
|
||||
elif country == WORLD_COASTS_NAME:
|
||||
steps.step_coastline_index(env, country, **kwargs)
|
||||
else:
|
||||
kwargs.update(
|
||||
{
|
||||
"uk_postcodes_dataset": settings.UK_POSTCODES_URL,
|
||||
"us_postcodes_dataset": settings.US_POSTCODES_URL,
|
||||
}
|
||||
)
|
||||
steps.step_index(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
@production_only
|
||||
class StageCitiesIdsWorld(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_cities_ids_world(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
@helper_stage_for("StageRoutingWorld")
|
||||
# ToDo: Are we sure that this stage will be skipped if StageRoutingWorld is skipped?
|
||||
class StagePrepareRoutingWorld(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_prepare_routing_world(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
class StageRoutingWorld(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_routing_world(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
@depends_from_internal(D(settings.UGC_URL, PathProvider.ugc_path),)
|
||||
@production_only
|
||||
class StageUgc(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_ugc(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
class StagePopularity(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_popularity(env, country, **kwargs)
|
||||
|
||||
@country_stage
|
||||
class StagePopularityWorld(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_popularity_world(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
class StageSrtm(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_srtm(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
class StageIsolinesInfo(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_isolines_info(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
class StageDescriptions(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_description(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
class StageRouting(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_routing(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
@depends_from_internal(
|
||||
D(settings.SUBWAY_URL, PathProvider.subway_path),
|
||||
D(settings.TRANSIT_URL, PathProvider.transit_path_experimental),
|
||||
)
|
||||
class StageRoutingTransit(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_routing_transit(env, country, **kwargs)
|
||||
|
||||
|
||||
@country_stage
|
||||
class StageMwmDiffs(Stage):
|
||||
def apply(self, env: Env, country, logger, **kwargs):
|
||||
data_dir = diffs.DataDir(
|
||||
diff_tool = env.diff_tool,
|
||||
mwm_name = f"{country}.mwm",
|
||||
new_version_dir = env.paths.mwm_path,
|
||||
old_version_root_dir = settings.DATA_ARCHIVE_DIR,
|
||||
)
|
||||
diffs.mwm_diff_calculation(data_dir, logger, depth=settings.DIFF_VERSION_DEPTH)
|
||||
|
||||
|
||||
@country_stage
|
||||
@helper_stage_for("StageStatistics")
|
||||
class StageMwmStatistics(Stage):
|
||||
def apply(self, env: Env, country, **kwargs):
|
||||
steps.step_statistics(env, country, **kwargs)
|
||||
|
||||
|
||||
@outer_stage
|
||||
@depends_from_internal(
|
||||
D(
|
||||
settings.PROMO_CATALOG_COUNTRIES_URL,
|
||||
PathProvider.promo_catalog_countries_path,
|
||||
"p",
|
||||
),
|
||||
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
|
||||
)
|
||||
class StageCountriesTxt(Stage):
|
||||
def apply(self, env: Env):
|
||||
countries = hierarchy_to_countries(
|
||||
env.paths.old_to_new_path,
|
||||
env.paths.borders_to_osm_path,
|
||||
env.paths.countries_synonyms_path,
|
||||
env.paths.hierarchy_path,
|
||||
env.paths.mwm_path,
|
||||
env.paths.mwm_version,
|
||||
)
|
||||
if env.production:
|
||||
inject_promo_ids(
|
||||
countries,
|
||||
env.paths.promo_catalog_cities_path,
|
||||
env.paths.promo_catalog_countries_path,
|
||||
env.paths.mwm_path,
|
||||
env.paths.types_path,
|
||||
env.paths.mwm_path,
|
||||
)
|
||||
|
||||
with open(env.paths.counties_txt_path, "w") as f:
|
||||
json.dump(countries, f, ensure_ascii=False, indent=1)
|
||||
|
||||
|
||||
@outer_stage
|
||||
@production_only
|
||||
class StageLocalAds(Stage):
|
||||
def apply(self, env: Env):
|
||||
create_csv(
|
||||
env.paths.localads_path,
|
||||
env.paths.mwm_path,
|
||||
env.paths.mwm_path,
|
||||
env.mwm_version,
|
||||
multiprocessing.cpu_count(),
|
||||
)
|
||||
with tarfile.open(f"{env.paths.localads_path}.tar.gz", "w:gz") as tar:
|
||||
for filename in os.listdir(env.paths.localads_path):
|
||||
tar.add(os.path.join(env.paths.localads_path, filename), arcname=filename)
|
||||
|
||||
|
||||
@outer_stage
|
||||
class StageStatistics(Stage):
|
||||
def apply(self, env: Env):
|
||||
steps_info = get_stages_info(env.paths.log_path, {"statistics"})
|
||||
stats = defaultdict(lambda: defaultdict(dict))
|
||||
stats["steps"] = steps_info["steps"]
|
||||
for country in env.get_tmp_mwm_names():
|
||||
with open(os.path.join(env.paths.stats_path, f"{country}.json")) as f:
|
||||
stats["countries"][country] = {
|
||||
"types": json.load(f),
|
||||
"steps": steps_info["countries"][country],
|
||||
}
|
||||
|
||||
def default(o):
|
||||
if isinstance(o, datetime.timedelta):
|
||||
return str(o)
|
||||
|
||||
with open(os.path.join(env.paths.stats_path, "stats.json"), "w") as f:
|
||||
json.dump(
|
||||
stats, f, ensure_ascii=False, sort_keys=True, indent=2, default=default
|
||||
)
|
||||
|
||||
|
||||
@outer_stage
|
||||
class StageCleanup(Stage):
|
||||
def apply(self, env: Env):
|
||||
logger.info(
|
||||
f"osm2ft files will be moved from {env.paths.mwm_path} "
|
||||
f"to {env.paths.osm2ft_path}."
|
||||
)
|
||||
for x in os.listdir(env.paths.mwm_path):
|
||||
p = os.path.join(env.paths.mwm_path, x)
|
||||
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
|
||||
shutil.move(p, os.path.join(env.paths.osm2ft_path, x))
|
||||
|
||||
logger.info(f"{env.paths.draft_path} will be removed.")
|
||||
shutil.rmtree(env.paths.draft_path)
|
||||
|
||||
27
tools/python/maps_generator/generator/stages_tests.py
Normal file
27
tools/python/maps_generator/generator/stages_tests.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import os
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from maps_generator.generator import settings
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.utils.file import download_file
|
||||
|
||||
|
||||
def make_test_booking_data(max_days):
|
||||
def test_booking_data(env: Env, logger, *args, **kwargs):
|
||||
if not settings.HOTELS_URL:
|
||||
return None
|
||||
base_url, _ = settings.HOTELS_URL.rsplit("/", maxsplit=1)
|
||||
url = f"{base_url}/meta.json"
|
||||
meta_path = os.path.join(env.paths.tmp_dir(), "hotels-meta.json")
|
||||
|
||||
download_file(url, meta_path)
|
||||
|
||||
with open(meta_path) as f:
|
||||
meta = json.load(f)
|
||||
raw_date = meta["latest"].strip()
|
||||
logger.info(f"Booking date is from {raw_date}.")
|
||||
dt = datetime.strptime(raw_date, "%Y_%m_%d-%H_%M_%S")
|
||||
return (env.dt - dt).days < max_days
|
||||
|
||||
return test_booking_data
|
||||
185
tools/python/maps_generator/generator/statistics.py
Normal file
185
tools/python/maps_generator/generator/statistics.py
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from typing import AnyStr
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
|
||||
from maps_generator.generator.env import WORLDS_NAMES
|
||||
from maps_generator.generator.exceptions import ParseError
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
# Parse entries, written by ./generator/statistics.cpp PrintTypeStats.
|
||||
RE_STAT = re.compile(
|
||||
r"([\w:-]+): "
|
||||
r"size = +\d+; "
|
||||
r"features = +(\d+); "
|
||||
r"length = +([0-9.e+-]+) m; "
|
||||
r"area = +([0-9.e+-]+) m²; "
|
||||
r"w\/names = +(\d+)"
|
||||
)
|
||||
|
||||
RE_TIME_DELTA = re.compile(
|
||||
r"^(?:(?P<days>-?\d+) (days?, )?)?"
|
||||
r"((?:(?P<hours>-?\d+):)(?=\d+:\d+))?"
|
||||
r"(?:(?P<minutes>-?\d+):)?"
|
||||
r"(?P<seconds>-?\d+)"
|
||||
r"(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$"
|
||||
)
|
||||
|
||||
RE_FINISH_STAGE = re.compile(r"(.*)Stage (.+): finished in (.+)$")
|
||||
|
||||
|
||||
def read_stat(f):
|
||||
stats = []
|
||||
for line in f:
|
||||
m = RE_STAT.match(line)
|
||||
# Skip explanation header strings.
|
||||
if m is None:
|
||||
continue
|
||||
|
||||
stats.append(
|
||||
{
|
||||
"name": m.group(1),
|
||||
"cnt": int(m.group(2)),
|
||||
"len": float(m.group(3)),
|
||||
"area": float(m.group(4)),
|
||||
"names": int(m.group(5)),
|
||||
}
|
||||
)
|
||||
return stats
|
||||
|
||||
|
||||
def read_config(f):
|
||||
config = []
|
||||
for line in f:
|
||||
l = line.strip()
|
||||
if l.startswith("#") or not l:
|
||||
continue
|
||||
columns = [c.strip() for c in l.split(";", 2)]
|
||||
columns[0] = re.compile(columns[0])
|
||||
columns[1] = columns[1].lower()
|
||||
config.append(columns)
|
||||
return config
|
||||
|
||||
|
||||
def process_stat(config, stats):
|
||||
result = {}
|
||||
for param in config:
|
||||
res = 0
|
||||
for t in stats:
|
||||
if param[0].match(t["name"]):
|
||||
if param[1] == "len":
|
||||
res += t["len"]
|
||||
elif param[1] == "area":
|
||||
res += t["area"]
|
||||
elif param[1] == "cnt_names":
|
||||
res += t["names"]
|
||||
else:
|
||||
res += t["cnt"]
|
||||
result[str(param[0]) + param[1]] = res
|
||||
return result
|
||||
|
||||
|
||||
def format_res(res, t):
|
||||
unit = None
|
||||
if t == "len":
|
||||
unit = "m"
|
||||
elif t == "area":
|
||||
unit = "m²"
|
||||
elif t == "cnt" or t == "cnt_names":
|
||||
unit = "pc"
|
||||
else:
|
||||
raise ParseError(f"Unknown type {t}.")
|
||||
|
||||
return res, unit
|
||||
|
||||
|
||||
def make_stats(config_path, stats_path):
|
||||
with open(config_path) as f:
|
||||
config = read_config(f)
|
||||
with open(stats_path) as f:
|
||||
stats = process_stat(config, read_stat(f))
|
||||
lines = []
|
||||
for param in config:
|
||||
k = str(param[0]) + param[1]
|
||||
st = format_res(stats[k], param[1])
|
||||
lines.append({"type": param[2], "quantity": st[0], "unit": st[1]})
|
||||
return lines
|
||||
|
||||
|
||||
def parse_time(time_str):
|
||||
parts = RE_TIME_DELTA.match(time_str)
|
||||
if not parts:
|
||||
return
|
||||
parts = parts.groupdict()
|
||||
time_params = {}
|
||||
for name, param in parts.items():
|
||||
if param:
|
||||
time_params[name] = int(param)
|
||||
return datetime.timedelta(**time_params)
|
||||
|
||||
|
||||
def get_stages_info(log_path, ignored_stages=frozenset()):
|
||||
result = defaultdict(lambda: defaultdict(dict))
|
||||
for file in os.listdir(log_path):
|
||||
path = os.path.join(log_path, file)
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
m = RE_FINISH_STAGE.match(line)
|
||||
if not m:
|
||||
continue
|
||||
stage_name = m.group(2)
|
||||
dt = parse_time(m.group(3))
|
||||
if file.startswith("stage_") and stage_name not in ignored_stages:
|
||||
result["stages"][stage_name] = dt
|
||||
else:
|
||||
country = file.split(".")[0]
|
||||
result["countries"][country][stage_name] = dt
|
||||
return result
|
||||
|
||||
|
||||
def read_types(path: AnyStr) -> Dict[AnyStr, Dict]:
|
||||
""""
|
||||
Reads and summarizes statistics for all countries, excluding World and
|
||||
WorldCoast.
|
||||
"""
|
||||
with open(path) as f:
|
||||
json_data = json.load(f)
|
||||
all_types = {}
|
||||
countries = json_data["countries"]
|
||||
for country, json_value in countries.items():
|
||||
if country in WORLDS_NAMES:
|
||||
continue
|
||||
try:
|
||||
json_types = json_value["types"]
|
||||
except KeyError:
|
||||
logger.exception(f"Cannot parse {json_value}")
|
||||
continue
|
||||
for t in json_types:
|
||||
curr = all_types.get(t["type"], {})
|
||||
curr["quantity"] = curr.get("quantity", 0.0) + t["quantity"]
|
||||
curr["unit"] = t["unit"]
|
||||
all_types[t["type"]] = curr
|
||||
return all_types
|
||||
|
||||
|
||||
def diff(new: Dict[AnyStr, Dict], old: Dict[AnyStr, Dict]) -> List:
|
||||
assert len(new) == len(old)
|
||||
lines = []
|
||||
for key in new:
|
||||
o = old[key]["quantity"]
|
||||
n = new[key]["quantity"]
|
||||
rel = 0
|
||||
if o != 0.0:
|
||||
rel = int(((n - o) / o) * 100)
|
||||
else:
|
||||
if n != 0.0:
|
||||
rel = 100
|
||||
|
||||
lines.append((key, o, n, rel, n - o, new[key]["unit"],))
|
||||
return lines
|
||||
53
tools/python/maps_generator/generator/status.py
Normal file
53
tools/python/maps_generator/generator/status.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import os
|
||||
from typing import AnyStr
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def with_stat_ext(country: AnyStr):
|
||||
return f"{country}.status"
|
||||
|
||||
|
||||
def without_stat_ext(status: AnyStr):
|
||||
return status.replace(".status", "")
|
||||
|
||||
|
||||
class Status:
|
||||
"""Status is used for recovering and continuation maps generation."""
|
||||
|
||||
def __init__(
|
||||
self, stat_path: Optional[AnyStr] = None, stat_next: Optional[AnyStr] = None
|
||||
):
|
||||
self.stat_path = stat_path
|
||||
self.stat_next = stat_next
|
||||
self.stat_saved = None
|
||||
self.find = False
|
||||
|
||||
def init(self, stat_path: AnyStr, stat_next: AnyStr):
|
||||
self.stat_path = stat_path
|
||||
self.stat_next = stat_next
|
||||
self.stat_saved = self.status()
|
||||
if not self.find:
|
||||
self.find = not self.stat_saved or not self.need_skip()
|
||||
|
||||
def need_skip(self) -> bool:
|
||||
if self.find:
|
||||
return False
|
||||
return self.stat_saved and self.stat_next != self.stat_saved
|
||||
|
||||
def update_status(self):
|
||||
with open(self.stat_path, "w") as status:
|
||||
status.write(self.stat_next)
|
||||
|
||||
def finish(self):
|
||||
with open(self.stat_path, "w") as status:
|
||||
status.write("finish")
|
||||
|
||||
def is_finished(self):
|
||||
return self.status() == "finish"
|
||||
|
||||
def status(self):
|
||||
try:
|
||||
with open(self.stat_path) as status:
|
||||
return status.read()
|
||||
except IOError:
|
||||
return None
|
||||
453
tools/python/maps_generator/generator/steps.py
Normal file
453
tools/python/maps_generator/generator/steps.py
Normal file
|
|
@ -0,0 +1,453 @@
|
|||
"""
|
||||
This file contains basic api for generator_tool and osm tools to generate maps.
|
||||
"""
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import AnyStr
|
||||
|
||||
from maps_generator.generator import settings
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.env import PathProvider
|
||||
from maps_generator.generator.env import WORLDS_NAMES
|
||||
from maps_generator.generator.env import WORLD_NAME
|
||||
from maps_generator.generator.env import get_all_countries_list
|
||||
from maps_generator.generator.exceptions import ValidationError
|
||||
from maps_generator.generator.exceptions import wait_and_raise_if_fail
|
||||
from maps_generator.generator.gen_tool import run_gen_tool
|
||||
from maps_generator.generator.osmtools import osmconvert
|
||||
from maps_generator.generator.osmtools import osmfilter
|
||||
from maps_generator.generator.osmtools import osmupdate
|
||||
from maps_generator.generator.statistics import make_stats
|
||||
from maps_generator.utils.file import download_files
|
||||
from maps_generator.utils.file import is_verified
|
||||
from maps_generator.utils.file import make_symlink
|
||||
from maps_generator.utils.md5 import md5_ext
|
||||
from maps_generator.utils.md5 import write_md5sum
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
|
||||
def multithread_run_if_one_country(func):
|
||||
@functools.wraps(func)
|
||||
def wrap(env, country, **kwargs):
|
||||
if len(env.countries) == 1:
|
||||
kwargs.update({"threads_count": settings.THREADS_COUNT})
|
||||
# Otherwise index stage of Taiwan_* mwms continues to run after all other mwms have finished:
|
||||
elif country == 'Taiwan_North':
|
||||
kwargs.update({"threads_count": 6})
|
||||
elif country == 'Taiwan_South':
|
||||
kwargs.update({"threads_count": 2})
|
||||
func(env, country, **kwargs)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
def convert_planet(
|
||||
tool: AnyStr,
|
||||
in_planet: AnyStr,
|
||||
out_planet: AnyStr,
|
||||
output=subprocess.DEVNULL,
|
||||
error=subprocess.DEVNULL,
|
||||
):
|
||||
osmconvert(tool, in_planet, out_planet, output=output, error=error)
|
||||
write_md5sum(out_planet, md5_ext(out_planet))
|
||||
|
||||
|
||||
def step_download_and_convert_planet(env: Env, force_download: bool, **kwargs):
|
||||
# Do not copy, convert, check a local .o5m planet dump, just symlink it instead.
|
||||
src = settings.PLANET_URL
|
||||
if src.startswith("file://") and src.endswith(".o5m"):
|
||||
os.symlink(src[7:], env.paths.planet_o5m)
|
||||
return
|
||||
|
||||
if force_download or not is_verified(env.paths.planet_osm_pbf):
|
||||
download_files(
|
||||
{
|
||||
settings.PLANET_URL: env.paths.planet_osm_pbf,
|
||||
settings.PLANET_MD5_URL: md5_ext(env.paths.planet_osm_pbf),
|
||||
},
|
||||
env.force_download_files,
|
||||
)
|
||||
|
||||
if not is_verified(env.paths.planet_osm_pbf):
|
||||
raise ValidationError(f"Wrong md5 sum for {env.paths.planet_osm_pbf}.")
|
||||
|
||||
convert_planet(
|
||||
env[settings.OSM_TOOL_CONVERT],
|
||||
env.paths.planet_osm_pbf,
|
||||
env.paths.planet_o5m,
|
||||
output=env.get_subprocess_out(),
|
||||
error=env.get_subprocess_out(),
|
||||
)
|
||||
|
||||
os.remove(env.paths.planet_osm_pbf)
|
||||
os.remove(md5_ext(env.paths.planet_osm_pbf))
|
||||
|
||||
|
||||
def step_update_planet(env: Env, **kwargs):
|
||||
tmp = f"{env.paths.planet_o5m}.tmp"
|
||||
osmupdate(
|
||||
env[settings.OSM_TOOL_UPDATE],
|
||||
env.paths.planet_o5m,
|
||||
tmp,
|
||||
output=env.get_subprocess_out(),
|
||||
error=env.get_subprocess_out(),
|
||||
**kwargs,
|
||||
)
|
||||
os.remove(env.paths.planet_o5m)
|
||||
os.rename(tmp, env.paths.planet_o5m)
|
||||
write_md5sum(env.paths.planet_o5m, md5_ext(env.paths.planet_o5m))
|
||||
|
||||
|
||||
def step_preprocess(env: Env, **kwargs):
|
||||
run_gen_tool(
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(),
|
||||
err=env.get_subprocess_out(),
|
||||
data_path=env.paths.data_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
osm_file_type="o5m",
|
||||
osm_file_name=env.paths.planet_o5m,
|
||||
node_storage=env.node_storage,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
preprocess=True,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_features(env: Env, **kwargs):
|
||||
if any(x not in WORLDS_NAMES for x in env.countries):
|
||||
kwargs.update({"generate_packed_borders": True})
|
||||
if any(x == WORLD_NAME for x in env.countries):
|
||||
kwargs.update({"generate_world": True})
|
||||
if len(env.countries) == len(get_all_countries_list(PathProvider.borders_path())):
|
||||
kwargs.update({"have_borders_for_whole_world": True})
|
||||
|
||||
run_gen_tool(
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(),
|
||||
err=env.get_subprocess_out(),
|
||||
data_path=env.paths.data_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
osm_file_type="o5m",
|
||||
osm_file_name=env.paths.planet_o5m,
|
||||
node_storage=env.node_storage,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
cities_boundaries_data=env.paths.cities_boundaries_path,
|
||||
generate_features=True,
|
||||
threads_count=settings.THREADS_COUNT_FEATURES_STAGE,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def run_gen_tool_with_recovery_country(env: Env, *args, **kwargs):
|
||||
if "data_path" not in kwargs or "output" not in kwargs:
|
||||
logger.warning("The call run_gen_tool() will be without recovery.")
|
||||
run_gen_tool(*args, **kwargs)
|
||||
|
||||
prev_data_path = kwargs["data_path"]
|
||||
mwm = f"{kwargs['output']}.mwm"
|
||||
osm2ft = f"{mwm}.osm2ft"
|
||||
kwargs["data_path"] = env.paths.draft_path
|
||||
make_symlink(
|
||||
os.path.join(prev_data_path, osm2ft), os.path.join(env.paths.draft_path, osm2ft)
|
||||
)
|
||||
shutil.copy(
|
||||
os.path.join(prev_data_path, mwm), os.path.join(env.paths.draft_path, mwm)
|
||||
)
|
||||
run_gen_tool(*args, **kwargs)
|
||||
shutil.move(
|
||||
os.path.join(env.paths.draft_path, mwm), os.path.join(prev_data_path, mwm)
|
||||
)
|
||||
kwargs["data_path"] = prev_data_path
|
||||
|
||||
|
||||
@multithread_run_if_one_country
|
||||
def _generate_common_index(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool(
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
node_storage=env.node_storage,
|
||||
planet_version=env.planet_version,
|
||||
generate_geometry=True,
|
||||
generate_index=True,
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_index_world(env: Env, country: AnyStr, **kwargs):
|
||||
_generate_common_index(
|
||||
env,
|
||||
country,
|
||||
generate_search_index=True,
|
||||
cities_boundaries_data=env.paths.cities_boundaries_path,
|
||||
generate_cities_boundaries=True,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_cities_ids_world(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
output=country,
|
||||
generate_cities_ids=True,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def filter_roads(
|
||||
name_executable,
|
||||
in_file,
|
||||
out_file,
|
||||
output=subprocess.DEVNULL,
|
||||
error=subprocess.DEVNULL,
|
||||
):
|
||||
osmfilter(
|
||||
name_executable,
|
||||
in_file,
|
||||
out_file,
|
||||
output=output,
|
||||
error=error,
|
||||
keep="",
|
||||
keep_ways="highway=motorway =trunk =primary =secondary =tertiary",
|
||||
)
|
||||
|
||||
|
||||
def make_world_road_graph(
|
||||
name_executable,
|
||||
path_roads_file,
|
||||
path_resources,
|
||||
path_res_file,
|
||||
logger,
|
||||
output=subprocess.DEVNULL,
|
||||
error=subprocess.DEVNULL,
|
||||
):
|
||||
world_roads_builder_tool_cmd = [
|
||||
name_executable,
|
||||
f"--path_roads_file={path_roads_file}",
|
||||
f"--path_resources={path_resources}",
|
||||
f"--path_res_file={path_res_file}",
|
||||
]
|
||||
logger.info(f"Starting {' '.join(world_roads_builder_tool_cmd)}")
|
||||
world_roads_builder_tool = subprocess.Popen(
|
||||
world_roads_builder_tool_cmd, stdout=output, stderr=error, env=os.environ
|
||||
)
|
||||
|
||||
wait_and_raise_if_fail(world_roads_builder_tool)
|
||||
|
||||
|
||||
def step_prepare_routing_world(env: Env, country: AnyStr, logger, **kwargs):
|
||||
filter_roads(
|
||||
env[settings.OSM_TOOL_FILTER],
|
||||
env.paths.planet_o5m,
|
||||
env.paths.world_roads_o5m,
|
||||
env.get_subprocess_out(country),
|
||||
env.get_subprocess_out(country),
|
||||
)
|
||||
make_world_road_graph(
|
||||
env.world_roads_builder_tool,
|
||||
env.paths.world_roads_o5m,
|
||||
env.paths.user_resource_path,
|
||||
env.paths.world_roads_path,
|
||||
logger,
|
||||
env.get_subprocess_out(country),
|
||||
env.get_subprocess_out(country)
|
||||
)
|
||||
|
||||
|
||||
def step_routing_world(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
output=country,
|
||||
world_roads_path=env.paths.world_roads_path,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_index(env: Env, country: AnyStr, **kwargs):
|
||||
_generate_common_index(env, country, generate_search_index=True, **kwargs)
|
||||
|
||||
|
||||
def step_coastline_index(env: Env, country: AnyStr, **kwargs):
|
||||
_generate_common_index(env, country, **kwargs)
|
||||
|
||||
|
||||
def step_ugc(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
ugc_data=env.paths.ugc_path,
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_popularity(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
generate_popular_places=True,
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def step_popularity_world(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
wikipedia_pages=env.paths.descriptions_path,
|
||||
idToWikidata=env.paths.id_to_wikidata_path,
|
||||
generate_popular_places=True,
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_srtm(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
srtm_path=env.paths.srtm_path(),
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_isolines_info(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
generate_isolines_info=True,
|
||||
isolines_path=PathProvider.isolines_path(),
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_description(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
wikipedia_pages=env.paths.descriptions_path,
|
||||
idToWikidata=env.paths.id_to_wikidata_path,
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_routing(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
cities_boundaries_data=env.paths.cities_boundaries_path,
|
||||
generate_maxspeed=True,
|
||||
make_city_roads=True,
|
||||
make_cross_mwm=True,
|
||||
generate_cameras=True,
|
||||
make_routing_index=True,
|
||||
generate_traffic_keys=False,
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_routing_transit(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
transit_path=env.paths.transit_path,
|
||||
transit_path_experimental=env.paths.transit_path_experimental,
|
||||
make_transit_cross_mwm=True,
|
||||
make_transit_cross_mwm_experimental=bool(env.paths.transit_path_experimental),
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def step_statistics(env: Env, country: AnyStr, **kwargs):
|
||||
run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=env.get_subprocess_out(country),
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.paths.mwm_path,
|
||||
intermediate_data_path=env.paths.intermediate_data_path,
|
||||
cache_path=env.paths.cache_path,
|
||||
user_resource_path=env.paths.user_resource_path,
|
||||
stats_types=True,
|
||||
output=country,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
with open(os.path.join(env.paths.stats_path, f"{country}.json"), "w") as f:
|
||||
json.dump(
|
||||
make_stats(
|
||||
settings.STATS_TYPES_CONFIG,
|
||||
os.path.join(env.paths.intermediate_data_path, f"{country}.stats"),
|
||||
),
|
||||
f,
|
||||
)
|
||||
55
tools/python/maps_generator/maps_generator.py
Normal file
55
tools/python/maps_generator/maps_generator.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
import logging
|
||||
from typing import AnyStr
|
||||
from typing import Iterable
|
||||
from typing import Optional
|
||||
|
||||
from maps_generator.generator import stages_declaration as sd
|
||||
from maps_generator.generator.env import Env
|
||||
from maps_generator.generator.generation import Generation
|
||||
from .generator.stages import Stage
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
|
||||
def run_generation(
|
||||
env: Env,
|
||||
stages: Iterable[Stage],
|
||||
from_stage: Optional[AnyStr] = None,
|
||||
build_lock: bool = True,
|
||||
):
|
||||
generation = Generation(env, build_lock)
|
||||
for s in stages:
|
||||
generation.add_stage(s)
|
||||
|
||||
generation.run(from_stage)
|
||||
|
||||
|
||||
def generate_maps(env: Env, from_stage: Optional[AnyStr] = None):
|
||||
""""Runs maps generation."""
|
||||
stages = (
|
||||
sd.StageDownloadAndConvertPlanet(),
|
||||
sd.StageUpdatePlanet(),
|
||||
sd.StageCoastline(),
|
||||
sd.StagePreprocess(),
|
||||
sd.StageFeatures(),
|
||||
sd.StageDownloadDescriptions(),
|
||||
sd.StageMwm(),
|
||||
sd.StageCountriesTxt(),
|
||||
sd.StageLocalAds(),
|
||||
sd.StageStatistics(),
|
||||
sd.StageCleanup(),
|
||||
)
|
||||
|
||||
run_generation(env, stages, from_stage)
|
||||
|
||||
|
||||
def generate_coasts(env: Env, from_stage: Optional[AnyStr] = None):
|
||||
"""Runs coasts generation."""
|
||||
stages = (
|
||||
sd.StageDownloadAndConvertPlanet(),
|
||||
sd.StageUpdatePlanet(),
|
||||
sd.StageCoastline(use_old_if_fail=False),
|
||||
sd.StageCleanup(),
|
||||
)
|
||||
|
||||
run_generation(env, stages, from_stage)
|
||||
8
tools/python/maps_generator/requirements.txt
Normal file
8
tools/python/maps_generator/requirements.txt
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
omim-data-all
|
||||
omim-data-files
|
||||
omim-descriptions
|
||||
omim-post_generation
|
||||
filelock==3.0.10
|
||||
beautifulsoup4==4.9.1
|
||||
requests>=2.31.0
|
||||
requests_file==1.5.1
|
||||
6
tools/python/maps_generator/requirements_dev.txt
Normal file
6
tools/python/maps_generator/requirements_dev.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
-r ../post_generation/requirements_dev.txt
|
||||
-r ../descriptions/requirements_dev.txt
|
||||
filelock==3.0.10
|
||||
beautifulsoup4==4.9.1
|
||||
requests>=2.31.0
|
||||
requests_file==1.5.1
|
||||
37
tools/python/maps_generator/setup.py
Executable file
37
tools/python/maps_generator/setup.py
Executable file
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
|
||||
import setuptools
|
||||
|
||||
module_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(module_dir, "..", "..", ".."))
|
||||
|
||||
from pyhelpers.setup import chdir
|
||||
from pyhelpers.setup import get_version
|
||||
from pyhelpers.setup import get_requirements
|
||||
|
||||
|
||||
with chdir(os.path.abspath(os.path.dirname(__file__))):
|
||||
setuptools.setup(
|
||||
name="omim-maps_generator",
|
||||
version=str(get_version()),
|
||||
author="CoMaps",
|
||||
author_email="info@comaps.app",
|
||||
description="This package contains tools for maps generation.",
|
||||
url="https://codeberg.org/comaps",
|
||||
package_dir={"maps_generator": ""},
|
||||
package_data={"": ["var/**/*"]},
|
||||
packages=[
|
||||
"maps_generator",
|
||||
"maps_generator.generator",
|
||||
"maps_generator.utils",
|
||||
"maps_generator.checks"
|
||||
],
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
],
|
||||
python_requires=">=3.6",
|
||||
install_requires=get_requirements(),
|
||||
)
|
||||
0
tools/python/maps_generator/tests/__init__.py
Normal file
0
tools/python/maps_generator/tests/__init__.py
Normal file
131
tools/python/maps_generator/tests/test_logs_reader.py
Normal file
131
tools/python/maps_generator/tests/test_logs_reader.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import unittest
|
||||
from collections import Counter
|
||||
|
||||
from maps_generator.checks.logs import logs_reader
|
||||
|
||||
|
||||
class TestLogsReader(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.dir = tempfile.TemporaryDirectory()
|
||||
with open(
|
||||
os.path.join(self.dir.name, "Czech_Jihovychod_Jihomoravsky kraj.log"), "w"
|
||||
) as file:
|
||||
file.write(LOG_STRING)
|
||||
|
||||
logs = list(logs_reader.LogsReader(self.dir.name))
|
||||
self.assertEqual(len(logs), 1)
|
||||
self.log = logs[0]
|
||||
|
||||
def tearDown(self):
|
||||
self.dir.cleanup()
|
||||
|
||||
def test_read_logs(self):
|
||||
self.assertTrue(self.log.name.startswith("Czech_Jihovychod_Jihomoravsky kraj"))
|
||||
self.assertTrue(self.log.is_mwm_log)
|
||||
self.assertFalse(self.log.is_stage_log)
|
||||
self.assertEqual(len(self.log.lines), 46)
|
||||
|
||||
def test_split_into_stages(self):
|
||||
st = logs_reader.split_into_stages(self.log)
|
||||
self.assertEqual(len(st), 4)
|
||||
names_counter = Counter(s.name for s in st)
|
||||
self.assertEqual(
|
||||
names_counter,
|
||||
Counter({"Routing": 1, "RoutingTransit": 1, "MwmStatistics": 2}),
|
||||
)
|
||||
|
||||
def test_split_and_normalize_logs(self):
|
||||
st = logs_reader.normalize_logs(logs_reader.split_into_stages(self.log))
|
||||
self.assertEqual(len(st), 3)
|
||||
m = {s.name: s for s in st}
|
||||
self.assertEqual(
|
||||
m["MwmStatistics"].duration, datetime.timedelta(seconds=3.628742)
|
||||
)
|
||||
|
||||
def test_count_levels(self):
|
||||
st = logs_reader.normalize_logs(logs_reader.split_into_stages(self.log))
|
||||
self.assertEqual(len(st), 3)
|
||||
m = {s.name: s for s in st}
|
||||
c = logs_reader.count_levels(m["Routing"])
|
||||
self.assertEqual(c, Counter({logging.INFO: 22, logging.ERROR: 1}))
|
||||
|
||||
c = logs_reader.count_levels(self.log.lines)
|
||||
self.assertEqual(c, Counter({logging.INFO: 45, logging.ERROR: 1}))
|
||||
|
||||
def test_find_and_parse(self):
|
||||
st = logs_reader.normalize_logs(logs_reader.split_into_stages(self.log))
|
||||
self.assertEqual(len(st), 3)
|
||||
m = {s.name: s for s in st}
|
||||
pattern_str = (
|
||||
r".*Leaps finished, elapsed: [0-9.]+ seconds, routes found: "
|
||||
r"(?P<routes_found>\d+) , not found: (?P<routes_not_found>\d+)$"
|
||||
)
|
||||
for found in (
|
||||
logs_reader.find_and_parse(m["Routing"], pattern_str),
|
||||
logs_reader.find_and_parse(self.log.lines, re.compile(pattern_str)),
|
||||
):
|
||||
|
||||
self.assertEqual(len(found), 1)
|
||||
line = found[0]
|
||||
self.assertEqual(
|
||||
line[0], {"routes_found": "996363", "routes_not_found": "126519"}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "main":
|
||||
unittest.main()
|
||||
|
||||
|
||||
LOG_STRING = """
|
||||
[2020-05-24 04:19:37,032] INFO stages Stage Routing: start ...
|
||||
[2020-05-24 04:19:37,137] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --cities_boundaries_data=/home/maps_build/2020_05_23__16_58_17/intermediate_data/cities_boundaries.bin --generate_maxspeed=true --make_city_roads=true --make_cross_mwm=true --generate_cameras=true --make_routing_index=true --generate_traffic_keys=true --output=Czech_Jihovychod_Jihomoravsky kraj
|
||||
LOG TID(1) INFO 3.29e-06 Loaded countries list for version: 200402
|
||||
LOG TID(1) INFO 7.945e-05 generator/camera_info_collector.cpp:339 BuildCamerasInfo() Generating cameras info for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
LOG TID(1) INFO 0.529856 generator/routing_index_generator.cpp:546 BuildRoutingIndex() Building routing index for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
LOG TID(1) INFO 2.11074 generator/routing_index_generator.cpp:563 BuildRoutingIndex() Routing section created: 639872 bytes, 163251 roads, 193213 joints, 429334 points
|
||||
LOG TID(1) INFO 2.90872 generator/restriction_generator.cpp:117 SerializeRestrictions() Routing restriction info: RestrictionHeader: { No => 430, Only => 284, NoUTurn => 123, OnlyUTurn => 0 }
|
||||
LOG TID(1) INFO 3.00342 generator/road_access_generator.cpp:799 BuildRoadAccessInfo() Generating road access info for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
LOG TID(1) INFO 3.77435 generator_tool/generator_tool.cpp:621 operator()() Generating cities boundaries roads for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
LOG TID(1) INFO 3.85993 generator/city_roads_generator.cpp:51 LoadCitiesBoundariesGeometry() Read: 14225 boundaries from: /home/maps_build/2020_05_23__16_58_17/intermediate_data/routing_city_boundaries.bin
|
||||
LOG TID(1) INFO 6.82577 routing/city_roads_serialization.hpp:78 Serialize() Serialized 81697 road feature ids in cities. Size: 77872 bytes.
|
||||
LOG TID(1) INFO 6.82611 generator_tool/generator_tool.cpp:621 operator()() Generating maxspeeds section for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
LOG TID(1) INFO 6.82616 generator/maxspeeds_builder.cpp:186 BuildMaxspeedsSection() BuildMaxspeedsSection( /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm , /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm.osm2ft , /home/maps_build/2020_05_23__16_58_17/intermediate_data/maxspeeds.csv )
|
||||
LOG TID(1) INFO 7.58621 routing/maxspeeds_serialization.hpp:144 Serialize() Serialized 11413 forward maxspeeds and 302 bidirectional maxspeeds. Section size: 17492 bytes.
|
||||
LOG TID(1) INFO 7.58623 generator/maxspeeds_builder.cpp:172 SerializeMaxspeeds() SerializeMaxspeeds( /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm , ...) serialized: 11715 maxspeed tags.
|
||||
LOG TID(1) INFO 7.64526 generator/routing_index_generator.cpp:596 BuildRoutingCrossMwmSection() Building cross mwm section for Czech_Jihovychod_Jihomoravsky kraj
|
||||
LOG TID(1) INFO 8.43521 generator/routing_index_generator.cpp:393 CalcCrossMwmConnectors() Transitions finished, transitions: 1246 , elapsed: 0.789908 seconds
|
||||
LOG TID(1) INFO 8.48956 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Pedestrian model. Number of enters: 1233 Number of exits: 1233
|
||||
LOG TID(1) INFO 8.48964 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Bicycle model. Number of enters: 1231 Number of exits: 1230
|
||||
LOG TID(1) INFO 8.48964 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Car model. Number of enters: 1089 Number of exits: 1089
|
||||
LOG TID(1) INFO 8.48965 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Transit model. Number of enters: 0 Number of exits: 0
|
||||
LOG TID(1) INFO 4241.68 generator/routing_index_generator.cpp:537 FillWeights() Leaps finished, elapsed: 4233.19 seconds, routes found: 996363 , not found: 126519
|
||||
LOG TID(1) INFO 4241.8 generator/routing_index_generator.cpp:588 SerializeCrossMwm() Cross mwm section generated, size: 1784214 bytes
|
||||
LOG TID(1) ERROR 4243.2 generator/routing_index_generator.cpp:588 SerializeCrossMwm() Fake error.
|
||||
[2020-05-24 05:30:19,319] INFO stages Stage Routing: finished in 1:10:42.287364
|
||||
[2020-05-24 05:30:19,319] INFO stages Stage RoutingTransit: start ...
|
||||
[2020-05-24 05:30:19,485] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --transit_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --make_transit_cross_mwm=true --output=Czech_Jihovychod_Jihomoravsky kraj
|
||||
LOG TID(1) INFO 3.107e-06 Loaded countries list for version: 200402
|
||||
LOG TID(1) INFO 6.0315e-05 generator/transit_generator.cpp:205 BuildTransit() Building transit section for Czech_Jihovychod_Jihomoravsky kraj mwmDir: /home/maps_build/2020_05_23__16_58_17/draft/
|
||||
LOG TID(1) INFO 5.40151 generator/routing_index_generator.cpp:617 BuildTransitCrossMwmSection() Building transit cross mwm section for Czech_Jihovychod_Jihomoravsky kraj
|
||||
LOG TID(1) INFO 5.47317 generator/routing_index_generator.cpp:320 CalcCrossMwmTransitions() Transit cross mwm section is not generated because no transit section in mwm: /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
LOG TID(1) INFO 5.4732 generator/routing_index_generator.cpp:393 CalcCrossMwmConnectors() Transitions finished, transitions: 0 , elapsed: 0.0716537 seconds
|
||||
LOG TID(1) INFO 5.47321 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Pedestrian model. Number of enters: 0 Number of exits: 0
|
||||
LOG TID(1) INFO 5.47321 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Bicycle model. Number of enters: 0 Number of exits: 0
|
||||
LOG TID(1) INFO 5.47322 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Car model. Number of enters: 0 Number of exits: 0
|
||||
LOG TID(1) INFO 5.47322 generator/routing_index_generator.cpp:411 CalcCrossMwmConnectors() Transit model. Number of enters: 0 Number of exits: 0
|
||||
LOG TID(1) INFO 5.47325 generator/routing_index_generator.cpp:588 SerializeCrossMwm() Cross mwm section generated, size: 31 bytes
|
||||
[2020-05-24 05:30:25,144] INFO stages Stage RoutingTransit: finished in 0:00:05.824967
|
||||
[2020-05-24 05:30:25,144] INFO stages Stage MwmStatistics: start ...
|
||||
[2020-05-24 05:30:25,212] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --stats_types=true --output=Czech_Jihovychod_Jihomoravsky kraj
|
||||
LOG TID(1) INFO 1.5806e-05 generator_tool/generator_tool.cpp:621 operator()() Calculating type statistics for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
[2020-05-24 05:30:28,773] INFO stages Stage MwmStatistics: finished in 0:00:03.628742
|
||||
[2020-05-24 06:30:25,144] INFO stages Stage MwmStatistics: start ...
|
||||
[2020-05-24 06:30:25,212] INFO gen_tool Run generator tool [generator_tool version 1590177464 f52c6496c4d90440f2e0d8088acdb3350dcf7c69]: /home/Projects/build-omim-Desktop_Qt_5_10_1_GCC_64bit-Release/generator_tool --threads_count=1 --data_path=/home/maps_build/2020_05_23__16_58_17/draft --intermediate_data_path=/home/maps_build/2020_05_23__16_58_17/intermediate_data --user_resource_path=/home/Projects/omim/data --stats_types=true --output=Czech_Jihovychod_Jihomoravsky kraj
|
||||
LOG TID(1) INFO 1.5806e-05 generator_tool/generator_tool.cpp:621 operator()() Calculating type statistics for /home/maps_build/2020_05_23__16_58_17/draft/Czech_Jihovychod_Jihomoravsky kraj.mwm
|
||||
[2020-05-24 06:30:28,773] INFO stages Stage MwmStatistics: finished in 0:00:01.628742
|
||||
"""
|
||||
44
tools/python/maps_generator/update_generation_order.py
Normal file
44
tools/python/maps_generator/update_generation_order.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import argparse
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from typing import Tuple
|
||||
|
||||
from maps_generator.checks.logs import logs_reader
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="This script generates file with countries that are "
|
||||
"ordered by time needed to generate them."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=str, required=True, help="Path to output file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--logs", type=str, required=True, help="Path to logs directory.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def process_log(log: logs_reader.Log) -> Tuple[str, float]:
|
||||
stage_logs = logs_reader.split_into_stages(log)
|
||||
stage_logs = logs_reader.normalize_logs(stage_logs)
|
||||
d = sum(s.duration.total_seconds() for s in stage_logs if s.duration is not None)
|
||||
return log.name, d
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
with ThreadPool() as pool:
|
||||
order = pool.map(
|
||||
process_log,
|
||||
(log for log in logs_reader.LogsReader(args.logs) if log.is_mwm_log),
|
||||
)
|
||||
|
||||
order.sort(key=lambda v: v[1], reverse=True)
|
||||
with open(args.output, "w") as out:
|
||||
out.write("# Mwm name\tGeneration time\n")
|
||||
out.writelines("{}\t{}\n".format(*line) for line in order)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
tools/python/maps_generator/utils/__init__.py
Normal file
0
tools/python/maps_generator/utils/__init__.py
Normal file
19
tools/python/maps_generator/utils/algo.py
Normal file
19
tools/python/maps_generator/utils/algo.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import re
|
||||
from datetime import timedelta
|
||||
|
||||
|
||||
DURATION_PATTERN = re.compile(
|
||||
r"((?P<days>[-\d]+) day[s]*, )?(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)"
|
||||
)
|
||||
|
||||
|
||||
def unique(s):
|
||||
seen = set()
|
||||
seen_add = seen.add
|
||||
return [x for x in s if not (x in seen or seen_add(x))]
|
||||
|
||||
|
||||
def parse_timedelta(s):
|
||||
m = DURATION_PATTERN.match(s)
|
||||
d = m.groupdict()
|
||||
return timedelta(**{k: float(d[k]) for k in d if d[k] is not None})
|
||||
201
tools/python/maps_generator/utils/file.py
Normal file
201
tools/python/maps_generator/utils/file.py
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
import errno
|
||||
import functools
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from functools import partial
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from typing import AnyStr
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from urllib.parse import unquote
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urlparse
|
||||
from urllib.request import url2pathname
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from requests_file import FileAdapter
|
||||
|
||||
from maps_generator.utils.md5 import check_md5
|
||||
from maps_generator.utils.md5 import md5_ext
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
||||
|
||||
def is_file_uri(url: AnyStr) -> bool:
|
||||
return urlparse(url).scheme == "file"
|
||||
|
||||
def file_uri_to_path(url : AnyStr) -> AnyStr:
|
||||
file_uri = urlparse(url)
|
||||
file_path = file_uri.path
|
||||
|
||||
# URI is something like "file://~/..."
|
||||
if file_uri.netloc == '~':
|
||||
file_path = f'~{file_uri.path}'
|
||||
return os.path.expanduser(file_path)
|
||||
|
||||
return file_path
|
||||
|
||||
def is_executable(fpath: AnyStr) -> bool:
|
||||
return fpath is not None and os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
|
||||
@functools.lru_cache()
|
||||
def find_executable(path: AnyStr, exe: Optional[AnyStr] = None) -> AnyStr:
|
||||
if exe is None:
|
||||
if is_executable(path):
|
||||
return path
|
||||
else:
|
||||
raise FileNotFoundError(path)
|
||||
find_pattern = f"{path}/**/{exe}"
|
||||
for name in glob.iglob(find_pattern, recursive=True):
|
||||
if is_executable(name):
|
||||
return name
|
||||
raise FileNotFoundError(f"{exe} not found in {path}")
|
||||
|
||||
|
||||
def download_file(url: AnyStr, name: AnyStr, download_if_exists: bool = True):
|
||||
logger.info(f"Trying to download {name} from {url}.")
|
||||
if not download_if_exists and os.path.exists(name):
|
||||
logger.info(f"File {name} already exists.")
|
||||
return
|
||||
|
||||
if is_file_uri(url):
|
||||
# url uses 'file://' scheme
|
||||
shutil.copy2(file_uri_to_path(url), name)
|
||||
logger.info(f"File {name} was copied from {url}.")
|
||||
return
|
||||
|
||||
tmp_name = f"{name}__"
|
||||
os.makedirs(os.path.dirname(tmp_name), exist_ok=True)
|
||||
with requests.Session() as session:
|
||||
session.mount("file://", FileAdapter())
|
||||
with open(tmp_name, "wb") as handle:
|
||||
response = session.get(url, stream=True)
|
||||
file_length = None
|
||||
try:
|
||||
file_length = int(response.headers["Content-Length"])
|
||||
except KeyError:
|
||||
logger.warning(
|
||||
f"There is no attribute Content-Length in headers [{url}]: {response.headers}"
|
||||
)
|
||||
|
||||
current = 0
|
||||
max_attempts = 32
|
||||
attempts = max_attempts
|
||||
while attempts:
|
||||
for data in response.iter_content(chunk_size=4096):
|
||||
current += len(data)
|
||||
handle.write(data)
|
||||
|
||||
if file_length is None or file_length == current:
|
||||
break
|
||||
|
||||
logger.warning(
|
||||
f"Download interrupted. Resuming download from {url}: {current}/{file_length}."
|
||||
)
|
||||
headers = {"Range": f"bytes={current}-"}
|
||||
response = session.get(url, headers=headers, stream=True)
|
||||
attempts -= 1
|
||||
|
||||
assert (
|
||||
attempts > 0
|
||||
), f"Maximum failed resuming download attempts of {max_attempts} is exceeded."
|
||||
|
||||
shutil.move(tmp_name, name)
|
||||
logger.info(f"File {name} was downloaded from {url}.")
|
||||
|
||||
|
||||
def is_dir(url) -> bool:
|
||||
return url.endswith("/")
|
||||
|
||||
|
||||
def find_files(url) -> List[AnyStr]:
|
||||
def files_list_file_scheme(path, results=None):
|
||||
if results is None:
|
||||
results = []
|
||||
|
||||
for p in os.listdir(path):
|
||||
new_path = os.path.join(path, p)
|
||||
if os.path.isdir(new_path):
|
||||
files_list_file_scheme(new_path, results)
|
||||
else:
|
||||
results.append(new_path)
|
||||
return results
|
||||
|
||||
def files_list_http_scheme(url, results=None):
|
||||
if results is None:
|
||||
results = []
|
||||
|
||||
page = requests.get(url).content
|
||||
bs = BeautifulSoup(page, "html.parser")
|
||||
links = bs.findAll("a", href=True)
|
||||
for link in links:
|
||||
href = link["href"]
|
||||
if href == "./" or href == "../":
|
||||
continue
|
||||
|
||||
new_url = urljoin(url, href)
|
||||
if is_dir(new_url):
|
||||
files_list_http_scheme(new_url, results)
|
||||
else:
|
||||
results.append(new_url)
|
||||
return results
|
||||
|
||||
parse_result = urlparse(url)
|
||||
if parse_result.scheme == "file":
|
||||
return [
|
||||
f.replace(parse_result.path, "")
|
||||
for f in files_list_file_scheme(parse_result.path)
|
||||
]
|
||||
if parse_result.scheme == "http" or parse_result.scheme == "https":
|
||||
return [f.replace(url, "") for f in files_list_http_scheme(url)]
|
||||
|
||||
assert False, parse_result
|
||||
|
||||
|
||||
def normalize_url_to_path_dict(
|
||||
url_to_path: Dict[AnyStr, AnyStr]
|
||||
) -> Dict[AnyStr, AnyStr]:
|
||||
for url in list(url_to_path.keys()):
|
||||
if is_dir(url):
|
||||
path = url_to_path[url]
|
||||
del url_to_path[url]
|
||||
for rel_path in find_files(url):
|
||||
abs_url = urljoin(url, rel_path)
|
||||
url_to_path[abs_url] = unquote(os.path.join(path, rel_path))
|
||||
return url_to_path
|
||||
|
||||
|
||||
def download_files(url_to_path: Dict[AnyStr, AnyStr], download_if_exists: bool = True):
|
||||
with ThreadPool() as pool:
|
||||
pool.starmap(
|
||||
partial(download_file, download_if_exists=download_if_exists),
|
||||
url_to_path.items(),
|
||||
)
|
||||
|
||||
|
||||
def is_exists_file_and_md5(name: AnyStr) -> bool:
|
||||
return os.path.isfile(name) and os.path.isfile(md5_ext(name))
|
||||
|
||||
|
||||
def is_verified(name: AnyStr) -> bool:
|
||||
return is_exists_file_and_md5(name) and check_md5(name, md5_ext(name))
|
||||
|
||||
|
||||
def make_symlink(target: AnyStr, link_name: AnyStr):
|
||||
try:
|
||||
os.symlink(target, link_name)
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST:
|
||||
if os.path.islink(link_name):
|
||||
link = os.readlink(link_name)
|
||||
if os.path.abspath(target) != os.path.abspath(link):
|
||||
raise e
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise e
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue